Skip to content

Commit

Permalink
Remove Scorable#docID. (#12407)
Browse files Browse the repository at this point in the history
`Scorable#docID()` exposes the document that is being collected, which makes it
impossible to bulk-collect multiple documents at once.

Relates #12358
  • Loading branch information
jpountz authored Jul 5, 2023
1 parent 9ffc625 commit f527eb3
Show file tree
Hide file tree
Showing 39 changed files with 256 additions and 453 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ API Changes

* GITHUB#12321: Reduced visibility of StringsToAutomaton. Please use Automata#makeStringUnion instead. (Greg Miller)

* GITHUB#12407: Removed Scorable#docID. (Adrien Grand)

New Features
---------------------

Expand Down
12 changes: 12 additions & 0 deletions lucene/MIGRATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ Lucene 9.2 or stay with 9.0.

See LUCENE-10558 for more details and workarounds.

### Removed Scorable#docID() (GITHUB#12407)

This method has been removed in order to enable more search-time optimizations.
Use the doc ID passed to `LeafCollector#collect` to know which doc ID is being
collected.

### ScoreCachingWrappingScorer now wraps a LeafCollector instead of a Scorable (GITHUB#12407)

In order to adapt to the removal of `Scorable#docID()`,
`ScoreCachingWrappingScorer` now wraps a `LeafCollector` rather than a
`Scorable`.

## Migration from Lucene 8.x to Lucene 9.0

### Rename of binary artifacts from '**-analyzers-**' to '**-analysis-**' (LUCENE-9562)
Expand Down
12 changes: 5 additions & 7 deletions lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ public BulkScorerAndDoc get(int i) {
final BulkScorerAndDoc[] leads;
final HeadPriorityQueue head;
final TailPriorityQueue tail;
final ScoreAndDoc scoreAndDoc = new ScoreAndDoc();
final Score score = new Score();
final int minShouldMatch;
final long cost;

Expand Down Expand Up @@ -181,12 +181,11 @@ public long cost() {
}

private void scoreDocument(LeafCollector collector, int base, int i) throws IOException {
final ScoreAndDoc scoreAndDoc = this.scoreAndDoc;
final Score score = this.score;
final Bucket bucket = buckets[i];
if (bucket.freq >= minShouldMatch) {
scoreAndDoc.score = (float) bucket.score;
score.score = (float) bucket.score;
final int doc = base | i;
scoreAndDoc.doc = doc;
collector.collect(doc);
}
bucket.freq = 0;
Expand Down Expand Up @@ -300,7 +299,7 @@ private void scoreWindowSingleScorer(
bulkScorer.score(collector, acceptDocs, windowMin, end);

// reset the scorer that should be used for the general case
collector.setScorer(scoreAndDoc);
collector.setScorer(score);
}

private BulkScorerAndDoc scoreWindow(
Expand Down Expand Up @@ -332,8 +331,7 @@ private BulkScorerAndDoc scoreWindow(

@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
scoreAndDoc.doc = -1;
collector.setScorer(scoreAndDoc);
collector.setScorer(score);

BulkScorerAndDoc top = advance(min);
while (top.next < max) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ public int score(final LeafCollector collector, Bits acceptDocs, int min, int ma
throws IOException {
final LeafCollector noScoreCollector =
new LeafCollector() {
ScoreAndDoc fake = new ScoreAndDoc();
Score fake = new Score();

@Override
public void setScorer(Scorable scorer) throws IOException {
Expand All @@ -173,7 +173,6 @@ public void setScorer(Scorable scorer) throws IOException {

@Override
public void collect(int doc) throws IOException {
fake.doc = doc;
collector.collect(doc);
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,12 @@ private static final class CachedScorable extends Scorable {
// the outer class does not incur access check by the JVM. The same
// situation would be if they were defined in the outer class as private
// members.
int doc;
float score;

@Override
public final float score() {
return score;
}

@Override
public int docID() {
return doc;
}
}

private static class NoScoreCachingCollector extends CachingCollector {
Expand Down Expand Up @@ -155,9 +149,8 @@ protected void collect(LeafCollector collector, int i) throws IOException {
final CachedScorable scorer = new CachedScorable();
collector.setScorer(scorer);
for (int j = 0; j < docs.length; ++j) {
scorer.doc = docs[j];
scorer.score = scores[j];
collector.collect(scorer.doc);
collector.collect(docs[j]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,9 @@ public String toString() {
* Returns a DoubleValues instance that wraps scores returned by a Scorer.
*
* <p>Note: If you intend to call {@link Scorable#score()} on the provided {@code scorer}
* separately, you may want to consider wrapping it with {@link
* ScoreCachingWrappingScorer#wrap(Scorable)} to avoid computing the actual score multiple times.
* separately, you may want to consider wrapping the collector with {@link
* ScoreCachingWrappingScorer#wrap(LeafCollector)} to avoid computing the actual score multiple
* times.
*/
public static DoubleValues fromScorer(Scorable scorer) {
return new DoubleValues() {
Expand All @@ -340,7 +341,6 @@ public double doubleValue() throws IOException {

@Override
public boolean advanceExact(int doc) throws IOException {
assert scorer.docID() == doc;
return true;
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,7 @@ public void setTopValue(Float value) {

@Override
public void setScorer(Scorable scorer) {
// wrap with a ScoreCachingWrappingScorer so that successive calls to
// score() will not incur score computation over and
// over again.
this.scorer = ScoreCachingWrappingScorer.wrap(scorer);
this.scorer = scorer;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,6 @@ public float score() throws IOException {
return in.score();
}

@Override
public int docID() {
return in.docID();
}

@Override
public Collection<ChildScorable> getChildren() throws IOException {
return Collections.singletonList(new ChildScorable(in, "FILTER"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,10 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
throws IOException {
max = Math.min(max, maxDoc);
ScoreAndDoc scorer = new ScoreAndDoc();
Score scorer = new Score();
scorer.score = score;
collector.setScorer(scorer);
for (int doc = min; doc < max; ++doc) {
scorer.doc = doc;
if (acceptDocs == null || acceptDocs.get(doc)) {
collector.collect(doc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
private final long cost;
private float minCompetitiveScore;
private boolean minCompetitiveScoreUpdated;
private ScoreAndDoc scorable = new ScoreAndDoc();
private Score scorable = new Score();
private final double[] maxScoreSums;

MaxScoreBulkScorer(int maxDoc, List<Scorer> scorers) throws IOException {
Expand Down Expand Up @@ -103,7 +103,6 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
}

if (possibleMatch) {
scorable.doc = top.doc;
scorable.score = (float) score;
collector.collect(top.doc);
}
Expand Down Expand Up @@ -203,15 +202,9 @@ public long cost() {
return cost;
}

private class ScoreAndDoc extends Scorable {
private class Score extends Scorable {

float score;
int doc = -1;

@Override
public int docID() {
return doc;
}

@Override
public float score() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,12 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
&& (scoreMode() == ScoreMode.TOP_SCORES || leafScoreMode != ScoreMode.TOP_SCORES)) {
return leafCollectors.get(0);
}
return new MultiLeafCollector(
leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
LeafCollector collector =
new MultiLeafCollector(leafCollectors, scoreMode() == ScoreMode.TOP_SCORES);
if (cacheScores) {
collector = ScoreCachingWrappingScorer.wrap(collector);
}
return collector;
}
}

Expand All @@ -169,24 +173,18 @@ public Collector[] getCollectors() {

private static class MultiLeafCollector implements LeafCollector {

private final boolean cacheScores;
private final LeafCollector[] collectors;
private final float[] minScores;
private final boolean skipNonCompetitiveScores;

private MultiLeafCollector(
List<LeafCollector> collectors, boolean cacheScores, boolean skipNonCompetitive) {
private MultiLeafCollector(List<LeafCollector> collectors, boolean skipNonCompetitive) {
this.collectors = collectors.toArray(new LeafCollector[collectors.size()]);
this.cacheScores = cacheScores;
this.skipNonCompetitiveScores = skipNonCompetitive;
this.minScores = this.skipNonCompetitiveScores ? new float[this.collectors.length] : null;
}

@Override
public void setScorer(Scorable scorer) throws IOException {
if (cacheScores) {
scorer = ScoreCachingWrappingScorer.wrap(scorer);
}
if (skipNonCompetitiveScores) {
for (int i = 0; i < collectors.length; ++i) {
final LeafCollector c = collectors[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,23 @@ public PositiveScoresOnlyCollector(Collector in) {

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
return new FilterLeafCollector(super.getLeafCollector(context)) {
return ScoreCachingWrappingScorer.wrap(
new FilterLeafCollector(super.getLeafCollector(context)) {

private Scorable scorer;
private Scorable scorer;

@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = ScoreCachingWrappingScorer.wrap(scorer);
in.setScorer(this.scorer);
}
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
in.setScorer(scorer);
}

@Override
public void collect(int doc) throws IOException {
if (scorer.score() > 0) {
in.collect(doc);
}
}
};
@Override
public void collect(int doc) throws IOException {
if (scorer.score() > 0) {
in.collect(doc);
}
}
});
}
}
3 changes: 0 additions & 3 deletions lucene/core/src/java/org/apache/lucene/search/Scorable.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ public float smoothingScore(int docId) throws IOException {
return 0f;
}

/** Returns the doc ID that is currently being scored. */
public abstract int docID();

/**
* Optional method: Tell the scorer that its iterator may safely ignore all documents whose score
* is less than the given {@code minScore}. This is a no-op by default.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,8 @@
* Used by {@link BulkScorer}s that need to pass a {@link Scorable} to {@link
* LeafCollector#setScorer}.
*/
final class ScoreAndDoc extends Scorable {
final class Score extends Scorable {
float score;
int doc = -1;

@Override
public int docID() {
return doc;
}

@Override
public float score() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,48 @@
*/
public final class ScoreCachingWrappingScorer extends Scorable {

private int lastDoc = -1;
private int curDoc = -1;
private float curScore;
private final Scorable in;

/**
* Wraps the provided {@link Scorable} unless it's already an instance of {@code
* ScoreCachingWrappingScorer}, in which case it will just return the provided instance.
*
* @param scorer Underlying {@code Scorable} to wrap
* @return Instance of {@code ScoreCachingWrappingScorer} wrapping the underlying {@code scorer}
* Wrap the provided {@link LeafCollector} so that scores are computed lazily and cached if
* accessed multiple times.
*/
public static Scorable wrap(Scorable scorer) {
if (scorer instanceof ScoreCachingWrappingScorer) {
return scorer;
public static LeafCollector wrap(LeafCollector collector) {
if (collector instanceof ScoreCachingWrappingLeafCollector) {
return collector;
}
return new ScoreCachingWrappingLeafCollector(collector);
}

private static class ScoreCachingWrappingLeafCollector extends FilterLeafCollector {

ScoreCachingWrappingLeafCollector(LeafCollector in) {
super(in);
}

private ScoreCachingWrappingScorer scorer;

@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = new ScoreCachingWrappingScorer(scorer);
super.setScorer(this.scorer);
}

@Override
public void collect(int doc) throws IOException {
if (scorer != null) {
scorer.curDoc = doc;
}
super.collect(doc);
}

@Override
public DocIdSetIterator competitiveIterator() throws IOException {
return in.competitiveIterator();
}
return new ScoreCachingWrappingScorer(scorer);
}

/** Creates a new instance by wrapping the given scorer. */
Expand All @@ -56,10 +82,9 @@ private ScoreCachingWrappingScorer(Scorable scorer) {

@Override
public float score() throws IOException {
int doc = in.docID();
if (doc != curDoc) {
if (lastDoc != curDoc) {
curScore = in.score();
curDoc = doc;
curDoc = lastDoc;
}

return curScore;
Expand All @@ -70,11 +95,6 @@ public void setMinCompetitiveScore(float minScore) throws IOException {
in.setMinCompetitiveScore(minScore);
}

@Override
public int docID() {
return in.docID();
}

@Override
public Collection<ChildScorable> getChildren() {
return Collections.singleton(new ChildScorable(in, "CACHED"));
Expand Down
Loading

0 comments on commit f527eb3

Please sign in to comment.