diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c11c77528f3e..e739dbcbe3f2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -134,6 +134,9 @@ Optimizations * GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X) +* GITHUB#13149: Made PointRangeQuery faster, for some segment sizes, by reducing the amount of virtual calls to + IntersectVisitor::visit(int). (Anton Hägerstrand) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java index 2bc2173560bd..14809d39bf4d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java @@ -31,6 +31,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; +import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.bkd.BKDConfig; /** @@ -288,7 +289,7 @@ public interface IntersectVisitor { void visit(int docID) throws IOException; /** - * Similar to {@link IntersectVisitor#visit(int)}, but a bulk visit and implements may have + * Similar to {@link IntersectVisitor#visit(int)}, but a bulk visit and implementations may have * their optimizations. */ default void visit(DocIdSetIterator iterator) throws IOException { @@ -298,6 +299,17 @@ default void visit(DocIdSetIterator iterator) throws IOException { } } + /** + * Similar to {@link IntersectVisitor#visit(int)}, but a bulk visit and implements may have + * their optimizations. Even if the implementation does the same thing this method, this may be + * a speed improvement due to fewer virtual calls. + */ + default void visit(IntsRef ref) throws IOException { + for (int i = ref.offset; i < ref.length + ref.offset; i++) { + visit(ref.ints[i]); + } + } + /** * Called for all documents in a leaf cell that crosses the query. The consumer should * scrutinize the packedValue to decide whether to accept it. In the 1D case, values are visited diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 64fb00bdb347..ad5f7bd9a049 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IntsRef; /** * Abstract class for range queries against single or multidimensional points such as {@link @@ -185,6 +186,13 @@ public void visit(DocIdSetIterator iterator) throws IOException { adder.add(iterator); } + @Override + public void visit(IntsRef ref) { + for (int i = ref.offset; i < ref.offset + ref.length; i++) { + adder.add(ref.ints[i]); + } + } + @Override public void visit(int docID, byte[] packedValue) { if (matches(packedValue)) { @@ -222,6 +230,14 @@ public void visit(DocIdSetIterator iterator) throws IOException { cost[0] = Math.max(0, cost[0] - iterator.cost()); } + @Override + public void visit(IntsRef ref) { + for (int i = ref.offset; i < ref.offset + ref.length; i++) { + result.clear(ref.ints[i]); + } + cost[0] -= ref.length; + } + @Override public void visit(int docID, byte[] packedValue) { if (matches(packedValue) == false) { diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java index 69818b337f21..9f6a10b9ddcf 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java @@ -23,6 +23,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.DocBaseBitSetIterator; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IntsRef; final class DocIdsWriter { @@ -36,6 +37,22 @@ final class DocIdsWriter { private final int[] scratch; + /** + * IntsRef to be used to iterate over the scratch buffer. A single instance is reused to avoid + * re-allocating the object. The ints and length fields need to be reset each use. + * + *
The main reason for existing is to be able to call the {@link + * IntersectVisitor#visit(IntsRef)} method rather than the {@link IntersectVisitor#visit(int)} + * method. This seems to make a difference in performance, probably due to fewer virtual calls + * then happening (once per read call rather than once per doc). + */ + private final IntsRef scratchIntsRef = new IntsRef(); + + { + // This is here to not rely on the default constructor of IntsRef to set offset to 0 + scratchIntsRef.offset = 0; + } + DocIdsWriter(int maxPointsInLeaf) { scratch = new int[maxPointsInLeaf]; } @@ -318,9 +335,9 @@ private static void readLegacyDeltaVInts(IndexInput in, int count, IntersectVisi private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) throws IOException { readDelta16(in, count, scratch); - for (int i = 0; i < count; i++) { - visitor.visit(scratch[i]); - } + scratchIntsRef.ints = scratch; + scratchIntsRef.length = count; + visitor.visit(scratchIntsRef); } private static void readInts24(IndexInput in, int count, IntersectVisitor visitor) @@ -346,8 +363,8 @@ private static void readInts24(IndexInput in, int count, IntersectVisitor visito private void readInts32(IndexInput in, int count, IntersectVisitor visitor) throws IOException { in.readInts(scratch, 0, count); - for (int i = 0; i < count; i++) { - visitor.visit(scratch[i]); - } + scratchIntsRef.ints = scratch; + scratchIntsRef.length = count; + visitor.visit(scratchIntsRef); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index 93646bfafa78..0ca2d177b09f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -374,6 +374,10 @@ public void testRandomLongsMedium() throws Exception { doTestRandomLongs(1000); } + public void testRandomLongsBig() throws Exception { + doTestRandomLongs(20_000); + } + private void doTestRandomLongs(int count) throws Exception { int numValues = TestUtil.nextInt(random(), count, count * 2); @@ -434,7 +438,13 @@ private static void verifyLongs(long[] values, int[] ids) throws Exception { dir = newMaybeVirusCheckingDirectory(); } - int missingPct = random().nextInt(100); + /* + The point range query chooses only considers using an inverse BKD visitor if + there is exactly one value per document. If any document misses a value that + code is not exercised. Using a nextBoolean() here increases the likelihood + that there is no missing values, making the test more likely to test that code. + */ + int missingPct = random().nextBoolean() ? 0 : random().nextInt(100); int deletedPct = random().nextInt(100); if (VERBOSE) { System.out.println(" missingPct=" + missingPct);