Skip to content

Commit

Permalink
add support for scored named queries (opensearch-project#11626)
Browse files Browse the repository at this point in the history
Opensearch already support labelling the queries, that returns as a list in the returned results, of which query it
matched. However one of the use case while doing hybrid search with query text and dense vector is to determine 
individual scores for each query type. This is very useful in further analysis and building offline model to generate 
better weights for ranking score. Hence adding this feature that sends the client to add the score for each matched 
query.

---------

Signed-off-by: Dharin Shah <8616130+Dharin-shah@users.noreply.github.com>
Signed-off-by: Dharin Shah <Dharin-shah@users.noreply.github.com>
Co-authored-by: Dharin Shah <8616130+Dharin-shah@users.noreply.github.com>
  • Loading branch information
2 people authored and Peter Alfonsi committed Mar 1, 2024
1 parent 609b373 commit 84bd2f1
Show file tree
Hide file tree
Showing 22 changed files with 672 additions and 134 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Remove concurrent segment search feature flag for GA launch ([#12074](https://github.com/opensearch-project/OpenSearch/pull/12074))
- Enable Fuzzy codec for doc id fields using a bloom filter ([#11022](https://github.com/opensearch-project/OpenSearch/pull/11022))
- [Metrics Framework] Adds support for Histogram metric ([#12062](https://github.com/opensearch-project/OpenSearch/pull/12062))
- Support for returning scores in matched queries ([#11626](https://github.com/opensearch-project/OpenSearch/pull/11626))

### Dependencies
- Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,11 @@
"search_pipeline": {
"type": "string",
"description": "The search pipeline to use to execute this request"
},
"include_named_queries_score":{
"type": "boolean",
"description":"Indicates whether hit.matched_queries should be rendered as a map that includes the name of the matched query associated with its score (true) or as an array containing the name of the matched queries (false)",
"default":false
}
},
"body":{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
setup:
- skip:
version: " - 2.12.0"
reason: "implemented for versions post 2.12.0"

---
"matched queries":
- do:
indices.create:
index: test

- do:
bulk:
refresh: true
body:
- '{ "index" : { "_index" : "test_1", "_id" : "1" } }'
- '{"field" : 1 }'
- '{ "index" : { "_index" : "test_1", "_id" : "2" } }'
- '{"field" : [1, 2] }'

- do:
search:
index: test_1
body:
query:
bool: {
should: [
{
match: {
field: {
query: 1,
_name: match_field_1
}
}
},
{
match: {
field: {
query: 2,
_name: match_field_2,
boost: 10
}
}
}
]
}

- match: {hits.total.value: 2}
- length: {hits.hits.0.matched_queries: 2}
- match: {hits.hits.0.matched_queries: [ "match_field_1", "match_field_2" ]}
- length: {hits.hits.1.matched_queries: 1}
- match: {hits.hits.1.matched_queries: [ "match_field_1" ]}

---

"matched queries with scores":
- do:
indices.create:
index: test

- do:
bulk:
refresh: true
body:
- '{ "index" : { "_index" : "test_1", "_id" : "1" } }'
- '{"field" : 1 }'
- '{ "index" : { "_index" : "test_1", "_id" : "2" } }'
- '{"field" : [1, 2] }'

- do:
search:
include_named_queries_score: true
index: test_1
body:
query:
bool: {
should: [
{
match: {
field: {
query: 1,
_name: match_field_1
}
}
},
{
match: {
field: {
query: 2,
_name: match_field_2,
boost: 10
}
}
}
]
}

- match: { hits.total.value: 2 }
- length: { hits.hits.0.matched_queries: 2 }
- match: { hits.hits.0.matched_queries.match_field_1: 1 }
- match: { hits.hits.0.matched_queries.match_field_2: 10 }
- length: { hits.hits.1.matched_queries: 1 }
- match: { hits.hits.1.matched_queries.match_field_1: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,16 @@ public void testPrimaryRelocationWhileIndexing() throws Exception {
ensureGreen("test");
AtomicInteger numAutoGenDocs = new AtomicInteger();
final AtomicBoolean finished = new AtomicBoolean(false);
Thread indexingThread = new Thread() {
@Override
public void run() {
while (finished.get() == false && numAutoGenDocs.get() < 10_000) {
IndexResponse indexResponse = client().prepareIndex("test").setId("id").setSource("field", "value").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
DeleteResponse deleteResponse = client().prepareDelete("test", "id").get();
assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult());
client().prepareIndex("test").setSource("auto", true).get();
numAutoGenDocs.incrementAndGet();
}
Thread indexingThread = new Thread(() -> {
while (finished.get() == false && numAutoGenDocs.get() < 10_000) {
IndexResponse indexResponse = client().prepareIndex("test").setId("id").setSource("field", "value").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
DeleteResponse deleteResponse = client().prepareDelete("test", "id").get();
assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult());
client().prepareIndex("test").setSource("auto", true).get();
numAutoGenDocs.incrementAndGet();
}
};
});
indexingThread.start();

ClusterState initialState = client().admin().cluster().prepareState().get().getState();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasItemInArray;
import static org.hamcrest.Matchers.hasKey;

public class MatchedQueriesIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {

Expand Down Expand Up @@ -95,15 +97,18 @@ public void testSimpleMatchedQueryFromFilteredQuery() throws Exception {
.should(rangeQuery("number").gte(2).queryName("test2"))
)
)
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("3") || hit.getId().equals("2")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test2"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test2"));
assertThat(hit.getMatchedQueryScore("test2"), equalTo(1f));
} else if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test1"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test1"));
assertThat(hit.getMatchedQueryScore("test1"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -113,15 +118,18 @@ public void testSimpleMatchedQueryFromFilteredQuery() throws Exception {
.setQuery(
boolQuery().should(rangeQuery("number").lte(2).queryName("test1")).should(rangeQuery("number").gt(2).queryName("test2"))
)
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1") || hit.getId().equals("2")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test1"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test1"));
assertThat(hit.getMatchedQueryScore("test1"), equalTo(1f));
} else if (hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test2"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test2"));
assertThat(hit.getMatchedQueryScore("test2"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -147,12 +155,15 @@ public void testSimpleMatchedQueryFromTopLevelFilter() throws Exception {
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(2));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueries(), hasItemInArray("title"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("title"));
assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f));
} else if (hit.getId().equals("2") || hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -168,12 +179,15 @@ public void testSimpleMatchedQueryFromTopLevelFilter() throws Exception {
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(2));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueries(), hasItemInArray("title"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("title"));
assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f));
} else if (hit.getId().equals("2") || hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -197,9 +211,11 @@ public void testSimpleMatchedQueryFromTopLevelFilterAndFilteredQuery() throws Ex
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1") || hit.getId().equals("2") || hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(2));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueries(), hasItemInArray("title"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("title"));
assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand Down Expand Up @@ -231,13 +247,15 @@ public void testRegExpQuerySupportsName() throws InterruptedException {

SearchResponse searchResponse = client().prepareSearch()
.setQuery(QueryBuilders.regexpQuery("title", "title1").queryName("regex"))
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 1L);

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("regex"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("regex"));
assertThat(hit.getMatchedQueryScore("regex"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -252,15 +270,17 @@ public void testPrefixQuerySupportsName() throws InterruptedException {
refresh();
indexRandomForConcurrentSearch("test1");

SearchResponse searchResponse = client().prepareSearch()
var query = client().prepareSearch()
.setQuery(QueryBuilders.prefixQuery("title", "title").queryName("prefix"))
.get();
.setIncludeNamedQueriesScore(true);
var searchResponse = query.get();
assertHitCount(searchResponse, 1L);

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("prefix"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("prefix"));
assertThat(hit.getMatchedQueryScore("prefix"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -282,8 +302,9 @@ public void testFuzzyQuerySupportsName() throws InterruptedException {

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("fuzzy"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("fuzzy"));
assertThat(hit.getMatchedQueryScore("fuzzy"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -300,13 +321,15 @@ public void testWildcardQuerySupportsName() throws InterruptedException {

SearchResponse searchResponse = client().prepareSearch()
.setQuery(QueryBuilders.wildcardQuery("title", "titl*").queryName("wildcard"))
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 1L);

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("wildcard"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("wildcard"));
assertThat(hit.getMatchedQueryScore("wildcard"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -328,8 +351,9 @@ public void testSpanFirstQuerySupportsName() throws InterruptedException {

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("span"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("span"));
assertThat(hit.getMatchedQueryScore("span"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand Down Expand Up @@ -363,11 +387,13 @@ public void testMatchedWithShould() throws Exception {
assertHitCount(searchResponse, 2L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("dolor"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("dolor"));
assertThat(hit.getMatchedQueryScore("dolor"), greaterThan(0f));
} else if (hit.getId().equals("2")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("elit"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("elit"));
assertThat(hit.getMatchedQueryScore("elit"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -391,7 +417,10 @@ public void testMatchedWithWrapperQuery() throws Exception {
for (QueryBuilder query : queries) {
SearchResponse searchResponse = client().prepareSearch().setQuery(query).get();
assertHitCount(searchResponse, 1L);
assertThat(searchResponse.getHits().getAt(0).getMatchedQueries()[0], equalTo("abc"));
SearchHit hit = searchResponse.getHits().getAt(0);
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("abc"));
assertThat(hit.getMatchedQueryScore("abc"), greaterThan(0f));
}
}
}
Loading

0 comments on commit 84bd2f1

Please sign in to comment.