From 60b7d380d65185748ea252d92a3b6f31d7bbf47a Mon Sep 17 00:00:00 2001 From: gf2121 <52390227+gf2121@users.noreply.github.com> Date: Tue, 13 Jun 2023 14:24:26 +0800 Subject: [PATCH] Speed up IndexedDISI Sparse #AdvanceExactWithinBlock for tiny step advance (#12324) --- lucene/CHANGES.txt | 2 ++ .../lucene/codecs/lucene90/IndexedDISI.java | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 703b051af256..669cddea52f7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -64,6 +64,8 @@ Improvements Optimizations --------------------- +* GITHUB#12324: Speed up sparse block advanceExact with tiny step in IndexedDISI. (Guo Feng) + * GITHUB#12270 Don't generate stacktrace in CollectionTerminatedException. (Armin Braun) * GITHUB#12160: Concurrent rewrite for AbstractKnnVectorQuery. (Kaival Parikh) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java index 205892d2fe9f..512ab4b1e556 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java @@ -110,12 +110,12 @@ public final class IndexedDISI extends DocIdSetIterator { private static void flush( int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) throws IOException { - assert block >= 0 && block < 65536; + assert block >= 0 && block < BLOCK_SIZE; out.writeShort((short) block); - assert cardinality > 0 && cardinality <= 65536; + assert cardinality > 0 && cardinality <= BLOCK_SIZE; out.writeShort((short) (cardinality - 1)); if (cardinality > MAX_ARRAY_LENGTH) { - if (cardinality != 65536) { // all docs are set + if (cardinality != BLOCK_SIZE) { // all docs are set if (denseRankPower != -1) { final byte[] rank = createRank(buffer, denseRankPower); out.writeBytes(rank, rank.length); @@ -418,6 +418,7 @@ public static RandomAccessInput createJumpTable( // SPARSE variables boolean exists; + int nextExistDocInBlock = -1; // DENSE variables long word; @@ -495,7 +496,8 @@ private void readBlockHeader() throws IOException { if (numValues <= MAX_ARRAY_LENGTH) { method = Method.SPARSE; blockEnd = slice.getFilePointer() + (numValues << 1); - } else if (numValues == 65536) { + nextExistDocInBlock = -1; + } else if (numValues == BLOCK_SIZE) { method = Method.ALL; blockEnd = slice.getFilePointer(); gap = block - index - 1; @@ -550,6 +552,7 @@ boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException { if (doc >= targetInBlock) { disi.doc = disi.block | doc; disi.exists = true; + disi.nextExistDocInBlock = doc; return true; } } @@ -560,6 +563,10 @@ boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException { boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException { final int targetInBlock = target & 0xFFFF; // TODO: binary search + if (disi.nextExistDocInBlock > targetInBlock) { + assert !disi.exists; + return false; + } if (target == disi.doc) { return disi.exists; } @@ -567,6 +574,7 @@ boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException int doc = Short.toUnsignedInt(disi.slice.readShort()); disi.index++; if (doc >= targetInBlock) { + disi.nextExistDocInBlock = doc; if (doc != targetInBlock) { disi.index--; disi.slice.seek(disi.slice.getFilePointer() - Short.BYTES);