Skip to content

Commit

Permalink
Support hits to log count (#789)
Browse files Browse the repository at this point in the history
* Support hits to log count

* Addressed PR comments

* Fixed hitsToLog with startHit

* Renamed method

* Fixed hitsToLog with startHit

* Added more unit tests

* Fixed formatting

* Changed doc_id to start from 1
  • Loading branch information
fragosoluana authored Jan 6, 2025
1 parent a69a73a commit 3472026
Show file tree
Hide file tree
Showing 7 changed files with 361 additions and 41 deletions.
3 changes: 3 additions & 0 deletions clientlib/src/main/proto/yelp/nrtsearch/search.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,9 @@ message LoggingHits {
string name = 1;
// Optional logging parameters
google.protobuf.Struct params = 2;
// number of hits to log. The number of final hits to be logged can be less than this number
// if a query has less hits.
int32 hitsToLog = 3;
}

// Specify how to highlight matched text in SearchRequest
Expand Down
36 changes: 31 additions & 5 deletions src/main/java/com/yelp/nrtsearch/server/handler/SearchHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -271,14 +271,26 @@ public SearchResponse handle(IndexState indexState, SearchRequest searchRequest)

long t0 = System.nanoTime();

hits = getHitsFromOffset(hits, searchContext.getStartHit(), searchContext.getTopHits());
hits =
getHitsFromOffset(
hits,
searchContext.getStartHit(),
Math.max(
searchContext.getTopHits(),
searchContext.getHitsToLog() + searchContext.getStartHit()));

// create Hit.Builder for each hit, and populate with lucene doc id and ranking info
setResponseHits(searchContext, hits);

// fill Hit.Builder with requested fields
fetchFields(searchContext);

// if there were extra hits for the logging, the response size needs to be reduced to match
// the topHits
if (searchContext.getFetchTasks().getHitsLoggerFetchTask() != null) {
setResponseTopHits(searchContext);
}

SearchState.Builder searchState = SearchState.newBuilder();
searchContext.getResponseBuilder().setSearchState(searchState);
searchState.setTimestamp(searchContext.getTimestampSec());
Expand Down Expand Up @@ -491,17 +503,17 @@ private void fetchFields(SearchContext searchContext)

/**
* Given all the top documents, produce a slice of the documents starting from a start offset and
* going up to the query needed maximum hits. There may be more top docs than the topHits limit,
* going up to the query needed maximum hits. There may be more top docs than the hitsCount limit,
* if top docs sampling facets are used.
*
* @param hits all hits
* @param startHit offset into top docs
* @param topHits maximum number of hits needed for search response
* @param hitsCount maximum number of hits needed for the query
* @return slice of hits starting at given offset, or empty slice if there are less than startHit
* docs
*/
public static TopDocs getHitsFromOffset(TopDocs hits, int startHit, int topHits) {
int retrieveHits = Math.min(topHits, hits.scoreDocs.length);
public static TopDocs getHitsFromOffset(TopDocs hits, int startHit, int hitsCount) {
int retrieveHits = Math.min(hitsCount, hits.scoreDocs.length);
if (startHit != 0 || retrieveHits != hits.scoreDocs.length) {
// Slice:
int count = Math.max(0, retrieveHits - startHit);
Expand All @@ -514,6 +526,20 @@ public static TopDocs getHitsFromOffset(TopDocs hits, int startHit, int topHits)
return hits;
}

/**
* Reduce response size by removing any extra hits used for logging. Final search response should
* only return top hits.
*
* @param context search context
*/
private static void setResponseTopHits(SearchContext context) {
while (context.getResponseBuilder().getHitsCount()
> context.getTopHits() - context.getStartHit()) {
int hitLastIdx = context.getResponseBuilder().getHitsCount() - 1;
context.getResponseBuilder().removeHits(hitLastIdx);
}
}

/**
* Add {@link com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.Builder}s to the context {@link
* SearchResponse.Builder} for each of the query hits. Populate the builders with the lucene doc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
public class HitsLoggerFetchTask implements FetchTask {
private static final double TEN_TO_THE_POWER_SIX = Math.pow(10, 6);
private final HitsLogger hitsLogger;
private final int hitsToLog;
private final DoubleAdder timeTakenMs = new DoubleAdder();

public HitsLoggerFetchTask(LoggingHits loggingHits) {
this.hitsLogger = HitsLoggerCreator.getInstance().createHitsLogger(loggingHits);
this.hitsToLog = loggingHits.getHitsToLog();
}

/**
Expand All @@ -46,7 +48,15 @@ public HitsLoggerFetchTask(LoggingHits loggingHits) {
@Override
public void processAllHits(SearchContext searchContext, List<SearchResponse.Hit.Builder> hits) {
long startTime = System.nanoTime();
hitsLogger.log(searchContext, hits);

// hits list can contain extra hits that don't need to be logged, otherwise, pass all hits that
// can be logged
if (searchContext.getHitsToLog() < hits.size()) {
hitsLogger.log(searchContext, hits.subList(0, searchContext.getHitsToLog()));
} else {
hitsLogger.log(searchContext, hits);
}

timeTakenMs.add(((System.nanoTime() - startTime) / TEN_TO_THE_POWER_SIX));
}

Expand All @@ -58,4 +68,13 @@ public void processAllHits(SearchContext searchContext, List<SearchResponse.Hit.
public double getTimeTakenMs() {
return timeTakenMs.doubleValue();
}

/**
* Get the total number of hits to log.
*
* @return Total number of hits to log.
*/
public int getHitsToLog() {
return hitsToLog;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,14 @@ public int getTopHits() {
return topHits;
}

/** Get the number of hits to log */
public int getHitsToLog() {
if (this.getFetchTasks().getHitsLoggerFetchTask() != null) {
return getFetchTasks().getHitsLoggerFetchTask().getHitsToLog();
}
return 0;
}

/**
* Get map of all fields usable for this query. This includes all fields defined in the index and
* dynamic fields from the request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,12 @@ public DocCollector(
}

public static int computeNumHitsToCollect(SearchRequest request) {
// determine how many hits to collect based on request, facets and rescore window
// determine how many hits to collect based on request, facets, rescore window and hits to log
int collectHits = request.getTopHits();
if (request.hasLoggingHits()) {
collectHits =
Math.max(collectHits, request.getLoggingHits().getHitsToLog() + request.getStartHit());
}
for (Facet facet : request.getFacetsList()) {
int facetSample = facet.getSampleTopDocs();
if (facetSample > 0 && facetSample > collectHits) {
Expand Down
Loading

0 comments on commit 3472026

Please sign in to comment.