diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java index 639bdbd73339..0ed8e235fa90 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java @@ -575,7 +575,10 @@ boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException return true; } } - disi.exists = false; + disi.exists = + false; // in this previous version of DISI we were also setting disi.exists as false for + // a doc that was backwards (and were not failing on an assert). Do we want to do that for + // Lucene 9.x? return false; } }, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java index 512ab4b1e556..b6f952809107 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java @@ -564,20 +564,22 @@ boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException final int targetInBlock = target & 0xFFFF; // TODO: binary search if (disi.nextExistDocInBlock > targetInBlock) { - assert !disi.exists; + assert !disi.exists; // this assert statement should not exist? + // Why do we check the previous state of the disi and assert on it? return false; } if (target == disi.doc) { - return disi.exists; + return disi.exists; // cursor is exactly at the target doc } for (; disi.index < disi.nextBlockIndex; ) { - int doc = Short.toUnsignedInt(disi.slice.readShort()); + int doc = Short.toUnsignedInt(disi.slice.readShort()); // read document at cursor position disi.index++; if (doc >= targetInBlock) { disi.nextExistDocInBlock = doc; if (doc != targetInBlock) { disi.index--; - disi.slice.seek(disi.slice.getFilePointer() - Short.BYTES); + disi.slice.seek( + disi.slice.getFilePointer() - Short.BYTES); // cursor goes back by one doc break; } disi.exists = true; @@ -674,12 +676,18 @@ boolean advanceExactWithinBlock(IndexedDISI disi, int target) { /** * Advance to the first doc from the block that is equal to or greater than {@code target}. * Return true if there is such a doc and false otherwise. + * + *

TODO: We need better javadoc here as well. */ abstract boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException; /** * Advance the iterator exactly to the position corresponding to the given {@code target} and * return whether this document exists. + * + *

TODO: We need better javadoc here to explain what exactly is the behavior if you were to + * call it with a target that is behind the current position of the cursor, if it is exactly at + * the position of the cursor. */ abstract boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException; } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestIndexedDISI.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestIndexedDISI.java index 3ba3db815654..37f4616766b8 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestIndexedDISI.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestIndexedDISI.java @@ -250,6 +250,56 @@ private void doTestAllSingleJump(BitSet set, Directory dir) throws IOException { } } + public void testAdvanceExactFailingOnAssert() throws IOException { + final int B = 65536; + final int blockCount = 5; + BitSet set = new SparseFixedBitSet(blockCount * B); + + // create a SPARSE bitset + for (int block = 0; block < blockCount; block++) { + for (int docID = block * B; docID < (block + 1) * B; docID += 101) { + set.set(docID); + } + } + + try (Directory dir = newDirectory()) { + final int cardinality = set.cardinality(); + final byte denseRankPower = + rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable + long length; + int jumpTableentryCount; + try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { + jumpTableentryCount = + IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower); + length = out.getFilePointer(); + } + + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + for (int i = 0; i < set.length(); i++) { + IndexedDISI disi = + new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); + // just testing a basic advanceExact + assertEquals( + "The bit at " + i + " should be correct with advanceExact", + set.get(i), + disi.advanceExact(i)); + + // If the current advance succeeds, and if you try to advance on the previous non-set doc, + // you should ideally get an IAE error (or should get a false as a return value) not an + // assertion error. + if (set.get(i)) { + if (i > 0) { + if (set.get(i - 1) + == false) { // this should ideally throw an IAE, not an assertion error + assertFalse(disi.advanceExact(i - 1)); // fails on the assert + } + } + } + } + } + } + } + public void testOneDoc() throws IOException { int maxDoc = TestUtil.nextInt(random(), 1, 100000); BitSet set = new SparseFixedBitSet(maxDoc);