Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOLR-16667 #4

Open
wants to merge 38 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
9e9ae26
First changes for cache integration in ltr ranking
aruggero Feb 17, 2023
dbb2931
Added cache in sparse model scorer
aruggero Feb 22, 2023
5e3895f
Removed logSingleHit
aruggero Feb 22, 2023
5e81f2c
Removed throws error because not arisen
aruggero Feb 22, 2023
84a4646
Added the new featureVectorCache in the SolrIndexSearcher
aruggero Feb 24, 2023
d137fd1
Merge remote-tracking branch 'UpStream/main' into ltr_feature_vector_…
aruggero Feb 28, 2023
b6bdba8
Removed unuseful piece of code in scoreSingleHit
aruggero Mar 1, 2023
27ed1cc
added comments for topN parameter
aruggero Mar 1, 2023
f78fead
Removed unthrown exceptions
aruggero Mar 1, 2023
f6415a2
Fixed cache usage in LTRScoringQuery score of both sparse and dense s…
aruggero Mar 2, 2023
ca30f38
Alternative approach with only floats in the feature vector to cache …
aruggero Mar 14, 2023
0ec2533
Merge remote-tracking branch 'upstream/main' into alternative_solutio…
alessandrobenedetti Mar 15, 2023
09ec0b7
first draft
alessandrobenedetti Mar 16, 2023
01a09d3
Reversed changes not related to feature vector cache
aruggero Mar 17, 2023
f5ea825
Refactoring
aruggero Mar 17, 2023
bad9950
Adjusted tests with dense returned feature format and fixed document …
aruggero Mar 20, 2023
61740bc
Fixed another test with dense format
aruggero Mar 20, 2023
07c4bbc
moved context to featureTraversalRescorer
aruggero Mar 20, 2023
5a745bf
Reversed randomic format tests
aruggero Mar 20, 2023
c125c1b
Fixed tests with random feature format
aruggero Mar 21, 2023
6b8e01f
Added NaN check also in vector extraction for isUsed condition
aruggero Mar 21, 2023
25ea7ac
Gradlew tidy
aruggero Mar 21, 2023
b198688
Changes cache key for feature vector: features definition + efi + docId
aruggero Mar 23, 2023
62af786
Divided query part of the feature vector key from the document part
aruggero Mar 23, 2023
8c07a05
Fixed problem with fvKey. Put featureStoreName instead of features (w…
aruggero Mar 23, 2023
0433606
Added features definition in the feature vector key
aruggero Mar 23, 2023
a80baaa
added space
aruggero Mar 23, 2023
c579b02
Added test for feature vector cache and checked if enabled in LTRScor…
aruggero Mar 28, 2023
fb934d7
Removed last featureVectorCache configuration from config test files
aruggero Mar 28, 2023
d5161f6
Added documentation for new cache and sparse format
aruggero Mar 29, 2023
b020225
minor changes to start pipeline
alessandrobenedetti Apr 12, 2023
e67af0c
minor changes to start pipeline
alessandrobenedetti Apr 12, 2023
00db16b
minor changes to start pipeline
alessandrobenedetti Apr 13, 2023
bb5cdcf
refactor
alessandrobenedetti Apr 13, 2023
c9900b1
fixed test
Apr 13, 2023
1d3f326
gradlew tidy
aruggero Apr 13, 2023
f8f068e
Created a separate method for efi hash
aruggero Apr 14, 2023
a6d459f
Changed isUsed in isDefaultValue
aruggero Apr 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion solr/core/src/java/org/apache/solr/core/SolrConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,9 @@ private SolrConfig(
queryResultCacheConfig =
CacheConfig.getConfig(
this, get("query").get("queryResultCache"), "query/queryResultCache");
featureVectorCacheConfig =
CacheConfig.getConfig(
this, get("query").get("featureVectorCache"), "query/featureVectorCache");
documentCacheConfig =
CacheConfig.getConfig(this, get("query").get("documentCache"), "query/documentCache");
CacheConfig conf =
Expand Down Expand Up @@ -709,6 +712,7 @@ public SolrRequestParsers getRequestParsers() {
public final CacheConfig queryResultCacheConfig;
public final CacheConfig documentCacheConfig;
public final CacheConfig fieldValueCacheConfig;
public final CacheConfig featureVectorCacheConfig;
public final Map<String, CacheConfig> userCacheConfigs;
// SolrIndexSearcher - more...
public final boolean useFilterForSortedQuery;
Expand Down Expand Up @@ -1071,7 +1075,12 @@ public Map<String, Object> toMap(Map<String, Object> result) {
}

addCacheConfig(
m, filterCacheConfig, queryResultCacheConfig, documentCacheConfig, fieldValueCacheConfig);
m,
filterCacheConfig,
queryResultCacheConfig,
documentCacheConfig,
fieldValueCacheConfig,
featureVectorCacheConfig);
m = new LinkedHashMap<>();
result.put("requestDispatcher", m);
m.put("handleSelect", handleSelect);
Expand Down
16 changes: 16 additions & 0 deletions solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private final SolrCache<Query, DocSet> filterCache;
private final SolrCache<QueryResultKey, DocList> queryResultCache;
private final SolrCache<String, UnInvertedField> fieldValueCache;
private final SolrCache<Integer, float[]> featureVectorCache;
private final LongAdder fullSortCount = new LongAdder();
private final LongAdder skipSortCount = new LongAdder();
private final LongAdder liveDocsNaiveCacheHitCount = new LongAdder();
Expand Down Expand Up @@ -385,6 +386,11 @@ public SolrIndexSearcher(
? null
: solrConfig.queryResultCacheConfig.newInstance();
if (queryResultCache != null) clist.add(queryResultCache);
featureVectorCache =
solrConfig.featureVectorCacheConfig == null
? null
: solrConfig.featureVectorCacheConfig.newInstance();
if (featureVectorCache != null) clist.add(featureVectorCache);
SolrCache<Integer, Document> documentCache = docFetcher.getDocumentCache();
if (documentCache != null) clist.add(documentCache);

Expand All @@ -406,6 +412,7 @@ public SolrIndexSearcher(
this.filterCache = null;
this.queryResultCache = null;
this.fieldValueCache = null;
this.featureVectorCache = null;
this.cacheMap = NO_GENERIC_CACHES;
this.cacheList = NO_CACHES;
}
Expand Down Expand Up @@ -622,6 +629,10 @@ public SolrCache<Query, DocSet> getFilterCache() {
return filterCache;
}

public SolrCache<Integer, float[]> getFeatureVectorCache() {
return featureVectorCache;
}

//
// Set default regenerators on filter and query caches if they don't have any
//
Expand Down Expand Up @@ -664,6 +675,11 @@ public <K, V> boolean regenerateItem(
});
}

if (solrConfig.featureVectorCacheConfig != null
&& solrConfig.featureVectorCacheConfig.getRegenerator() == null) {
solrConfig.featureVectorCacheConfig.setRegenerator(new NoOpRegenerator());
}

if (solrConfig.queryResultCacheConfig != null
&& solrConfig.queryResultCacheConfig.getRegenerator() == null) {
final int queryResultWindowSize = solrConfig.queryResultWindowSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,20 @@ public class CSVFeatureLogger extends FeatureLogger {
private final char keyValueSep;
private final char featureSep;

public CSVFeatureLogger(String fvCacheName, FeatureFormat f) {
super(fvCacheName, f);
public CSVFeatureLogger(FeatureFormat f) {
super(f);
this.keyValueSep = DEFAULT_KEY_VALUE_SEPARATOR;
this.featureSep = DEFAULT_FEATURE_SEPARATOR;
}

public CSVFeatureLogger(String fvCacheName, FeatureFormat f, char keyValueSep, char featureSep) {
super(fvCacheName, f);
public CSVFeatureLogger(FeatureFormat f, char keyValueSep, char featureSep) {
super(f);
this.keyValueSep = keyValueSep;
this.featureSep = featureSep;
}

@Override
public String makeFeatureVector(LTRScoringQuery.FeatureInfo[] featuresInfo) {
public String printFeatureVector(LTRScoringQuery.FeatureInfo[] featuresInfo) {
// Allocate the buffer to a size based on the number of features instead of the
// default 16. You need space for the name, value, and two separators per feature,
// but not all the features are expected to fire, so this is just a naive estimate.
Expand Down
51 changes: 2 additions & 49 deletions solr/modules/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,68 +16,21 @@
*/
package org.apache.solr.ltr;

import org.apache.solr.search.SolrIndexSearcher;

/**
* FeatureLogger can be registered in a model and provide a strategy for logging the feature values.
*/
public abstract class FeatureLogger {

/** the name of the cache using for storing the feature value */
private final String fvCacheName;

public enum FeatureFormat {
DENSE,
SPARSE
};

protected final FeatureFormat featureFormat;

protected FeatureLogger(String fvCacheName, FeatureFormat f) {
this.fvCacheName = fvCacheName;
protected FeatureLogger(FeatureFormat f) {
this.featureFormat = f;
}

/**
* Log will be called every time that the model generates the feature values for a document and a
* query.
*
* @param docid Solr document id whose features we are saving
* @param featuresInfo List of all the {@link LTRScoringQuery.FeatureInfo} objects which contain
* name and value for all the features triggered by the result set
* @return true if the logger successfully logged the features, false otherwise.
*/
public boolean log(
int docid,
LTRScoringQuery scoringQuery,
SolrIndexSearcher searcher,
LTRScoringQuery.FeatureInfo[] featuresInfo) {
final String featureVector = makeFeatureVector(featuresInfo);
if (featureVector == null) {
return false;
}

if (null == searcher.cacheInsert(fvCacheName, fvCacheKey(scoringQuery, docid), featureVector)) {
return false;
}

return true;
}

public abstract String makeFeatureVector(LTRScoringQuery.FeatureInfo[] featuresInfo);

private static int fvCacheKey(LTRScoringQuery scoringQuery, int docid) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the generation of the cache key has been moved from this class, motivation?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this method is no longer invoked inside the Logger (see row 81 below), therefore I moved it where it is called (in LTRScoringQuery)

Copy link
Collaborator Author

@aruggero aruggero Apr 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also this is now called when creating the LTRScoringQuery (query part of the key) and added as a private variable in the LTRScoringQuery object

return scoringQuery.hashCode() + (31 * docid);
}

/**
* populate the document with its feature vector
*
* @param docid Solr document id
* @return String representation of the list of features calculated for docid
*/
public String getFeatureVector(
int docid, LTRScoringQuery scoringQuery, SolrIndexSearcher searcher) {
return (String) searcher.cacheLookup(fvCacheName, fvCacheKey(scoringQuery, docid));
}
public abstract String printFeatureVector(LTRScoringQuery.FeatureInfo[] featuresInfo);
}
48 changes: 7 additions & 41 deletions solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.Weight;
import org.apache.solr.ltr.interleaving.OriginalRankingLTRScoringQuery;
import org.apache.solr.search.SolrIndexSearcher;

/**
* Implements the rescoring logic. The top documents returned by solr with their original scores,
Expand Down Expand Up @@ -136,7 +135,7 @@ private ScoreDoc[] rerank(IndexSearcher searcher, int topN, ScoreDoc[] firstPass
(LTRScoringQuery.ModelWeight)
searcher.createWeight(searcher.rewrite(scoringQuery), ScoreMode.COMPLETE, 1);

scoreFeatures(searcher, topN, modelWeight, firstPassResults, leaves, reranked);
scoreFeatures(topN, modelWeight, firstPassResults, leaves, reranked);
// Must sort all documents that we reranked, and then select the top
Arrays.sort(reranked, scoreComparator);
return reranked;
Expand All @@ -151,7 +150,6 @@ protected static ScoreDoc[] getFirstPassDocsRanked(TopDocs firstPassTopDocs) {
}

public void scoreFeatures(
IndexSearcher indexSearcher,
int topN,
LTRScoringQuery.ModelWeight modelWeight,
ScoreDoc[] hits,
Expand Down Expand Up @@ -180,36 +178,13 @@ public void scoreFeatures(
docBase = readerContext.docBase;
scorer = modelWeight.scorer(readerContext);
}
if (scoreSingleHit(topN, docBase, hitUpto, hit, docID, scorer, reranked)) {
logSingleHit(indexSearcher, modelWeight, hit.doc, scoringQuery);
}
scoreSingleHit(topN, docBase, hitUpto, hit, docID, scorer, reranked);
hitUpto++;
}
}

/**
* Call this method if the {@link #scoreSingleHit(int, int, int, ScoreDoc, int,
* org.apache.solr.ltr.LTRScoringQuery.ModelWeight.ModelScorer, ScoreDoc[])} method indicated that
* the document's feature info should be logged.
*/
protected static void logSingleHit(
IndexSearcher indexSearcher,
LTRScoringQuery.ModelWeight modelWeight,
int docid,
LTRScoringQuery scoringQuery) {
final FeatureLogger featureLogger = scoringQuery.getFeatureLogger();
if (featureLogger != null && indexSearcher instanceof SolrIndexSearcher) {
featureLogger.log(
docid, scoringQuery, (SolrIndexSearcher) indexSearcher, modelWeight.getFeaturesInfo());
}
}

/**
* Scores a single document and returns true if the document's feature info should be logged via
* the {@link #logSingleHit(IndexSearcher, org.apache.solr.ltr.LTRScoringQuery.ModelWeight, int,
* LTRScoringQuery)} method. Feature info logging is only necessary for the topN documents.
*/
protected static boolean scoreSingleHit(
/** Scores a single document. */
protected static void scoreSingleHit(
int topN,
int docBase,
int hitUpto,
Expand All @@ -230,32 +205,23 @@ protected static boolean scoreSingleHit(
scorer.docID();
scorer.iterator().advance(targetDoc);

boolean logHit = false;

scorer.getDocInfo().setOriginalDocScore(hit.score);
hit.score = scorer.score();
if (hitUpto < topN) {
reranked[hitUpto] = hit;
// if the heap is not full, maybe I want to log the features for this
// document
logHit = true;
} else if (hitUpto == topN) {
// collected topN document, I create the heap
heapify(reranked, topN);
}
if (hitUpto >= topN) {
// once that heap is ready, if the score of this document is lower that
// the minimum
// i don't want to log the feature. Otherwise I replace it with the
// minimum and fix the
// heap.
// once that heap is ready, if the score of this document is greater that
// the minimum I replace it with the
// minimum and fix the heap.
if (hit.score > reranked[0].score) {
reranked[0] = hit;
heapAdjust(reranked, topN, 0);
logHit = true;
}
}
return logHit;
}

@Override
Expand Down
Loading