Skip to content

Commit

Permalink
LUCENE-10458: BoundedDocSetIdIterator may supply error count in Weigt…
Browse files Browse the repository at this point in the history
…h#count(LeafReaderContext) when missingValue enables (#736)
  • Loading branch information
LuXugang authored Mar 23, 2022
1 parent 1c6f631 commit 5450d72
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 27 deletions.
8 changes: 5 additions & 3 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ API Changes

New Features
---------------------

* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
to speed up computing the number of hits when possible. (Lu Xugang, Luca Cavanna, Adrien Grand)

* LUCENE-10422: Monitor Improvements: `Monitor` can use a custom `Directory`
implementation. `Monitor` can be created with a readonly `QueryIndex` in order to
have readonly `Monitor` instances. (Niko Usai)
Expand All @@ -66,6 +70,7 @@ Improvements

Optimizations
---------------------

* LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (Peter Gromov)

* LUCENE-10451: Hunspell: don't perform potentially expensive spellchecking after timeout (Peter Gromov)
Expand Down Expand Up @@ -195,9 +200,6 @@ New Features
based on TotalHitCountCollector that allows users to parallelize counting the
number of hits. (Luca Cavanna, Adrien Grand)

* LUCENE-10385: Implement Weight#count on IndexSortSortedNumericDocValuesRangeQuery
to speed up computing the number of hits when possible. (Luca Cavanna, Adrien Grand)

* LUCENE-10403: Add ArrayUtil#grow(T[]). (Greg Miller)

* LUCENE-10414: Add fn:fuzzyTerm interval function to flexible query parser (Dawid Weiss,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
Expand Down Expand Up @@ -198,16 +200,18 @@ public boolean isCacheable(LeafReaderContext ctx) {

@Override
public int count(LeafReaderContext context) throws IOException {
BoundedDocSetIdIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null) {
return disi.lastDoc - disi.firstDoc;
if (context.reader().hasDeletions() == false) {
BoundedDocIdSetIterator disi = getDocIdSetIteratorOrNull(context);
if (disi != null && disi.delegate == null) {
return disi.lastDoc - disi.firstDoc;
}
}
return fallbackWeight.count(context);
}
};
}

private BoundedDocSetIdIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
private BoundedDocIdSetIterator getDocIdSetIteratorOrNull(LeafReaderContext context)
throws IOException {
SortedNumericDocValues sortedNumericValues =
DocValues.getSortedNumeric(context.reader(), field);
Expand Down Expand Up @@ -237,7 +241,7 @@ private BoundedDocSetIdIterator getDocIdSetIteratorOrNull(LeafReaderContext cont
* {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with
* no value.
*/
private BoundedDocSetIdIterator getDocIdSetIterator(
private BoundedDocIdSetIterator getDocIdSetIterator(
SortField sortField, LeafReaderContext context, DocIdSetIterator delegate)
throws IOException {
long lower = sortField.getReverse() ? upperValue : lowerValue;
Expand Down Expand Up @@ -278,7 +282,19 @@ private BoundedDocSetIdIterator getDocIdSetIterator(
}

int lastDocIdExclusive = high + 1;
return new BoundedDocSetIdIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
Object missingValue = sortField.getMissingValue();
BoundedDocIdSetIterator disi;
LeafReader reader = context.reader();
PointValues pointValues = reader.getPointValues(field);
final long missingLongValue = missingValue == null ? 0L : (long) missingValue;
// all documents have docValues or missing value falls outside the range
if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc())
|| (missingLongValue < lowerValue || missingLongValue > upperValue)) {
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, null);
} else {
disi = new BoundedDocIdSetIterator(firstDocIdInclusive, lastDocIdExclusive, delegate);
}
return disi;
}

/** Compares the given document's value with a stored reference value. */
Expand Down Expand Up @@ -306,14 +322,14 @@ private static ValueComparator loadComparator(
* A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range
* [firstDocInclusive, lastDoc).
*/
private static class BoundedDocSetIdIterator extends DocIdSetIterator {
private static class BoundedDocIdSetIterator extends DocIdSetIterator {
private final int firstDoc;
private final int lastDoc;
private final DocIdSetIterator delegate;

private int docID = -1;

BoundedDocSetIdIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) {
this.firstDoc = firstDoc;
this.lastDoc = lastDoc;
this.delegate = delegate;
Expand All @@ -335,7 +351,12 @@ public int advance(int target) throws IOException {
target = firstDoc;
}

int result = delegate.advance(target);
int result;
if (delegate != null) {
result = delegate.advance(target);
} else {
result = target;
}
if (result < lastDoc) {
docID = result;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@

import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
Expand Down Expand Up @@ -59,7 +61,14 @@ public void testSameHitsAsPointRangeQuery() throws IOException {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
boolean reverse = random().nextBoolean();
SortField sortField = new SortedNumericSortField("dv", SortField.Type.LONG, reverse);
sortField.setMissingValue(random().nextLong());
boolean enableMissingValue = random().nextBoolean();
if (enableMissingValue) {
long missingValue =
random().nextBoolean()
? TestUtil.nextLong(random(), -100, 10000)
: (random().nextBoolean() ? Long.MIN_VALUE : Long.MAX_VALUE);
sortField.setMissingValue(missingValue);
}
iwc.setIndexSort(new Sort(sortField));

RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Expand Down Expand Up @@ -459,7 +468,7 @@ public void testIndexSortOptimizationDeactivated(RandomIndexWriter writer) throw
reader.close();
}

public void testCount() throws IOException {
public void testFallbackCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
Expand All @@ -471,44 +480,133 @@ public void testCount() throws IOException {
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

Query fallbackQuery = LongPoint.newRangeQuery("field", 1, 42);
Query query = new IndexSortSortedNumericDocValuesRangeQuery("field", 1, 42, fallbackQuery);
// we use an unrealistic query that exposes its own Weight#count
Query fallbackQuery = new MatchNoDocsQuery();
// the index is not sorted on this field, the fallback query is used
Query query = new IndexSortSortedNumericDocValuesRangeQuery("another", 1, 42, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertEquals(1, weight.count(context));
assertEquals(0, weight.count(context));
}

writer.close();
reader.close();
dir.close();
}

public void testFallbackCount() throws IOException {
public void testCompareCount() throws IOException {
final int iters = atLeast(10);
for (int iter = 0; iter < iters; ++iter) {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
boolean enableMissingValue = random().nextBoolean();
if (enableMissingValue) {
long missingValue =
random().nextBoolean()
? TestUtil.nextLong(random(), -100, 10000)
: (random().nextBoolean() ? Long.MIN_VALUE : Long.MAX_VALUE);
sortField.setMissingValue(missingValue);
}
iwc.setIndexSort(new Sort(sortField));

RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final int numValues = TestUtil.nextInt(random(), 0, 1);
for (int j = 0; j < numValues; ++j) {
final long value = TestUtil.nextLong(random(), -100, 10000);
doc = createSNDVAndPointDocument("field", value);
}
writer.addDocument(doc);
}

if (random().nextBoolean()) {
writer.deleteDocuments(LongPoint.newRangeQuery("field", 0L, 10L));
}

final IndexReader reader = writer.getReader();
final IndexSearcher searcher = newSearcher(reader);
writer.close();

for (int i = 0; i < 100; ++i) {
final long min =
random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
final long max =
random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
final Query q1 = LongPoint.newRangeQuery("field", min, max);

final Query fallbackQuery = LongPoint.newRangeQuery("field", min, max);
final Query q2 =
new IndexSortSortedNumericDocValuesRangeQuery("field", min, max, fallbackQuery);
final Weight weight1 = q1.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
final Weight weight2 = q2.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
assertSameCount(weight1, weight2, searcher);
}

reader.close();
dir.close();
}
}

private void assertSameCount(Weight weight1, Weight weight2, IndexSearcher searcher)
throws IOException {
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertEquals(weight1.count(context), weight2.count(context));
}
}

public void testCountBoundary() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
boolean useLower = random().nextBoolean();
long lowerValue = 1;
long upperValue = 100;
sortField.setMissingValue(useLower ? lowerValue : upperValue);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 10));
writer.addDocument(doc);

writer.addDocument(
createSNDVAndPointDocument("field", random().nextLong(lowerValue, upperValue)));
writer.addDocument(
createSNDVAndPointDocument("field", random().nextLong(lowerValue, upperValue)));
// missingValue
writer.addDocument(createMissingValueDocument());

IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);

// we use an unrealistic query that exposes its own Weight#count
Query fallbackQuery = new MatchNoDocsQuery();
// the index is not sorted on this field, the fallback query is used
Query query = new IndexSortSortedNumericDocValuesRangeQuery("another", 1, 42, fallbackQuery);
Query fallbackQuery = LongPoint.newRangeQuery("field", lowerValue, upperValue);
Query query =
new IndexSortSortedNumericDocValuesRangeQuery(
"field", lowerValue, upperValue, fallbackQuery);
Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f);
for (LeafReaderContext context : searcher.getLeafContexts()) {
assertEquals(0, weight.count(context));
assertEquals(2, weight.count(context));
}

writer.close();
reader.close();
dir.close();
}

private Document createMissingValueDocument() {
Document doc = new Document();
doc.add(new StringField("foo", "fox", Field.Store.YES));
return doc;
}

private Document createSNDVAndPointDocument(String field, long value) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField(field, value));
doc.add(new LongPoint(field, value));
return doc;
}

private Document createDocument(String field, long value) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField(field, value));
Expand Down

0 comments on commit 5450d72

Please sign in to comment.