From f737c00eab4ba0a846b4f03406f54b4fa3c9f635 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 21 Jan 2025 10:39:32 +0100 Subject: [PATCH] Implement #intoBitSet on `IntArrayDocIdSet` and `RoaringDocIdSet`. (#14135) These doc id sets can implement `#intoBitSet` in a way that auto-vectorizes. For reference, `RoaringDocIdSet` is used by the query cache, and `IntArrayDocIdSet` is used by point queries. --- .../lucene/search/DocIdSetIterator.java | 2 ++ .../apache/lucene/util/IntArrayDocIdSet.java | 15 ++++++++ .../apache/lucene/util/RoaringDocIdSet.java | 34 +++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java b/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java index e0bee1da2314..421323440865 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java @@ -228,6 +228,8 @@ protected final int slowAdvance(int target) throws IOException { * *

Note: It is important not to clear bits from {@code bitSet} that may be already set. * + *

Note: {@code offset} may be negative. + * * @lucene.internal */ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java b/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java index d44cc7839233..4f764b37dfd9 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java @@ -95,6 +95,21 @@ public int advance(int target) throws IOException { return doc = docs[i++]; } + @Override + public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException { + if (doc >= upTo) { + return; + } + + int from = i - 1; + int to = VectorUtil.findNextGEQ(docs, upTo, from, length); + for (int i = from; i < to; ++i) { + bitSet.set(docs[i] - offset); + } + doc = docs[to]; + i = to + 1; + } + @Override public long cost() { return length; diff --git a/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java b/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java index ccd92a74250e..77038dd07eda 100644 --- a/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/RoaringDocIdSet.java @@ -217,6 +217,20 @@ public int advance(int target) throws IOException { return doc = docId(i); } } + + @Override + public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException { + if (doc >= upTo) { + return; + } + + int from = i; + advance(upTo); + int to = i; + for (int i = from; i < to; ++i) { + bitSet.set(docId(i) - offset); + } + } }; } } @@ -312,6 +326,26 @@ private int firstDocFromNextBlock() throws IOException { } } + @Override + public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException { + for (; ; ) { + int subUpto = upTo - (block << 16); + if (subUpto < 0) { + break; + } + int subOffset = offset - (block << 16); + sub.intoBitSet(subUpto, bitSet, subOffset); + if (sub.docID() == NO_MORE_DOCS) { + if (firstDocFromNextBlock() == NO_MORE_DOCS) { + break; + } + } else { + doc = (block << 16) | sub.docID(); + break; + } + } + } + @Override public long cost() { return cardinality;