Skip to content

Commit

Permalink
Remove acceptDocs argument from DocIdSetIterator#intoBitSet and i…
Browse files Browse the repository at this point in the history
…ntroduce `Bits#applyMask`.

Most `DocIdSetIterator` implementations can no longer implement `#intoBitSet`
efficiently as soon as there are live docs. So this commit remove this argument
and instead introduces a new `Bits#applyMask` API that helps clear bits in a
bit set when the corresponding doc ID is not live.

Relates apache#14133
  • Loading branch information
jpountz committed Jan 13, 2025
1 parent c20e09e commit 932cccb
Show file tree
Hide file tree
Showing 12 changed files with 296 additions and 137 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
Expand Down Expand Up @@ -878,16 +877,13 @@ public int advance(int target) throws IOException {
}

@Override
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
if (doc >= upTo) {
return;
}

// Handle the current doc separately, it may be on the previous docBuffer.
if (acceptDocs == null || acceptDocs.get(doc)) {
bitSet.set(doc - offset);
}
bitSet.set(doc - offset);

for (; ; ) {
if (docBufferUpto == BLOCK_SIZE) {
Expand All @@ -898,7 +894,7 @@ public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset
int start = docBufferUpto;
int end = computeBufferEndBoundary(upTo);
if (end != 0) {
bufferIntoBitSet(start, end, acceptDocs, bitSet, offset);
bufferIntoBitSet(start, end, bitSet, offset);
doc = docBuffer[end - 1];
}
docBufferUpto = end;
Expand All @@ -922,15 +918,12 @@ private int computeBufferEndBoundary(int upTo) {
}
}

private void bufferIntoBitSet(
int start, int end, Bits acceptDocs, FixedBitSet bitSet, int offset) throws IOException {
// acceptDocs#get (if backed by FixedBitSet), bitSet#set and `doc - offset` get
// auto-vectorized
private void bufferIntoBitSet(int start, int end, FixedBitSet bitSet, int offset)
throws IOException {
// bitSet#set and `doc - offset` get auto-vectorized
for (int i = start; i < end; ++i) {
int doc = docBuffer[i];
if (acceptDocs == null || acceptDocs.get(doc)) {
bitSet.set(doc - offset);
}
bitSet.set(doc - offset);
}
}

Expand Down
61 changes: 29 additions & 32 deletions lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,37 +164,6 @@ public long cost() {
return cost;
}

private void scoreDisiWrapperIntoBitSet(DisiWrapper w, Bits acceptDocs, int min, int max)
throws IOException {
boolean needsScores = BooleanScorer.this.needsScores;
FixedBitSet matching = BooleanScorer.this.matching;
Bucket[] buckets = BooleanScorer.this.buckets;

DocIdSetIterator it = w.iterator;
Scorable scorer = w.scorable;
int doc = w.doc;
if (doc < min) {
doc = it.advance(min);
}
if (buckets == null) {
it.intoBitSet(acceptDocs, max, matching, doc & ~MASK);
} else {
for (; doc < max; doc = it.nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
final int i = doc & MASK;
matching.set(i);
final Bucket bucket = buckets[i];
bucket.freq++;
if (needsScores) {
bucket.score += scorer.score();
}
}
}
}

w.doc = it.docID();
}

private void scoreWindowIntoBitSetAndReplay(
LeafCollector collector,
Bits acceptDocs,
Expand All @@ -207,7 +176,35 @@ private void scoreWindowIntoBitSetAndReplay(
for (int i = 0; i < numScorers; ++i) {
final DisiWrapper w = scorers[i];
assert w.doc < max;
scoreDisiWrapperIntoBitSet(w, acceptDocs, min, max);

DocIdSetIterator it = w.iterator;
int doc = w.doc;
if (doc < min) {
doc = it.advance(min);
}
if (buckets == null) {
// This doesn't apply live docs, so we'll need to apply them later
it.intoBitSet(max, matching, base);
} else {
for (; doc < max; doc = it.nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
final int d = doc & MASK;
matching.set(d);
final Bucket bucket = buckets[d];
bucket.freq++;
if (needsScores) {
bucket.score += w.scorable.score();
}
}
}
}

w.doc = it.docID();
}

if (buckets == null && acceptDocs != null) {
// In this case, live docs have not been applied yet.
acceptDocs.applyMask(matching, base);
}

docIdStreamView.base = base;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ private void scoreWindowUsingBitSet(
assert clauseWindowMatches.scanIsEmpty();

int offset = lead.docID();
lead.intoBitSet(acceptDocs, max, windowMatches, offset);
lead.intoBitSet(max, windowMatches, offset);
if (acceptDocs != null) {
// Apply live docs.
acceptDocs.applyMask(windowMatches, offset);
}

int upTo = 0;
for (;
Expand All @@ -116,9 +120,7 @@ private void scoreWindowUsingBitSet(
if (other.docID() < offset) {
other.advance(offset);
}
// No need to apply acceptDocs on other clauses since we already applied live docs on the
// leading clause.
other.intoBitSet(null, max, clauseWindowMatches, offset);
other.intoBitSet(max, clauseWindowMatches, offset);
windowMatches.and(clauseWindowMatches);
clauseWindowMatches.clear();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.util.Collection;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

/**
Expand Down Expand Up @@ -150,17 +149,16 @@ public int advance(int target) throws IOException {
}

@Override
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
while (leadTop.doc < upTo) {
leadTop.approximation.intoBitSet(acceptDocs, upTo, bitSet, offset);
leadTop.approximation.intoBitSet(upTo, bitSet, offset);
leadTop.doc = leadTop.approximation.docID();
leadTop = leadIterators.updateTop();
}

minOtherDoc = Integer.MAX_VALUE;
for (DisiWrapper w : otherIterators) {
w.approximation.intoBitSet(acceptDocs, upTo, bitSet, offset);
w.approximation.intoBitSet(upTo, bitSet, offset);
w.doc = w.approximation.docID();
minOtherDoc = Math.min(minOtherDoc, w.doc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.lucene.search;

import java.io.IOException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

/**
Expand Down Expand Up @@ -220,9 +219,7 @@ protected final int slowAdvance(int target) throws IOException {
*
* <pre class="prettyprint">
* for (int doc = docID(); doc &lt; upTo; doc = nextDoc()) {
* if (acceptDocs == null || acceptDocs.get(doc)) {
* bitSet.set(doc - offset);
* }
* bitSet.set(doc - offset);
* }
* </pre>
*
Expand All @@ -233,13 +230,10 @@ protected final int slowAdvance(int target) throws IOException {
*
* @lucene.internal
*/
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
assert offset <= docID();
for (int doc = docID(); doc < upTo; doc = nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
bitSet.set(doc - offset);
}
bitSet.set(doc - offset);
}
}
}
17 changes: 5 additions & 12 deletions lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,20 +99,13 @@ public long cost() {
}

@Override
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
// TODO: Can we also optimize the case when acceptDocs is not null?
if (acceptDocs == null
&& offset < bits.length()
&& bits instanceof FixedBitSet fixedBits
// no bits are set between `offset` and `doc`
&& fixedBits.nextSetBit(offset) == doc
// the whole `bitSet` is getting filled
&& (upTo - offset == bitSet.length())) {
bitSet.orRange(fixedBits, offset);
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
upTo = Math.min(upTo, bits.length());
if (upTo > doc && bits instanceof FixedBitSet fixedBits) {
FixedBitSet.orRange(fixedBits, doc, bitSet, doc - offset, upTo - doc);
advance(upTo); // set the current doc
} else {
super.intoBitSet(acceptDocs, upTo, bitSet, offset);
super.intoBitSet(upTo, bitSet, offset);
}
}
}
28 changes: 28 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/Bits.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;

/**
* Interface for Bitset-like structures.
*
Expand All @@ -34,6 +36,32 @@ public interface Bits {
/** Returns the number of bits in this set */
int length();

/**
* Apply this {@code Bits} instance to the given {@link FixedBitSet}, which starts at the given
* {@code offset}.
*
* <p>This should behave the same way as the default implementation, which does the following:
*
* <pre class="prettyprint">
* for (int i = bitSet.nextSetBit(0);
* i != DocIdSetIterator.NO_MORE_DOCS;
* i = i + 1 >= bitSet.length() ? DocIdSetIterator.NO_MORE_DOCS : bitSet.nextSetBit(i + 1)) {
* if (get(offset + i) == false) {
* bitSet.clear(i);
* }
* }
* </pre>
*/
default void applyMask(FixedBitSet bitSet, int offset) {
for (int i = bitSet.nextSetBit(0);
i != DocIdSetIterator.NO_MORE_DOCS;
i = i + 1 >= bitSet.length() ? DocIdSetIterator.NO_MORE_DOCS : bitSet.nextSetBit(i + 1)) {
if (get(offset + i) == false) {
bitSet.clear(i);
}
}
}

Bits[] EMPTY_ARRAY = new Bits[0];

/** Bits impl of the specified length with all bits set. */
Expand Down
Loading

0 comments on commit 932cccb

Please sign in to comment.