diff --git a/CHANGELOG.md b/CHANGELOG.md index c9d7d9a60a3e5..3d8a5d63579a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Support object fields in star-tree index([#16728](https://github.com/opensearch-project/OpenSearch/pull/16728/)) - Support searching from doc_value using termQueryCaseInsensitive/termQuery in flat_object/keyword field([#16974](https://github.com/opensearch-project/OpenSearch/pull/16974/)) - Added a new `time` field to replace the deprecated `getTime` field in `GetStats`. ([#17009](https://github.com/opensearch-project/OpenSearch/pull/17009)) +- Improve performance of the bitmap filtering([#16936](https://github.com/opensearch-project/OpenSearch/pull/16936/)) ### Dependencies - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504)) diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 43e975f95757b..702e5db50e841 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -46,10 +46,8 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; @@ -73,6 +71,7 @@ import org.opensearch.search.DocValueFormat; import org.opensearch.search.lookup.SearchLookup; import org.opensearch.search.query.BitmapDocValuesQuery; +import org.opensearch.search.query.BitmapIndexQuery; import java.io.IOException; import java.math.BigInteger; @@ -81,7 +80,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -888,10 +886,10 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha } if (isSearchable && hasDocValues) { - return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap)); + return new IndexOrDocValuesQuery(new BitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap)); } if (isSearchable) { - return bitmapIndexQuery(field, bitmap); + return new BitmapIndexQuery(field, bitmap); } return new BitmapDocValuesQuery(field, bitmap); } @@ -1507,40 +1505,6 @@ public static Query unsignedLongRangeQuery( } return builder.apply(l, u); } - - static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) { - final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); - return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { - - final Iterator iterator = bitmap.iterator(); - - @Override - public BytesRef next() { - int value; - if (iterator.hasNext()) { - value = iterator.next(); - } else { - return null; - } - IntPoint.encodeDimension(value, encoded.bytes, 0); - return encoded; - } - }) { - @Override - public Query rewrite(IndexSearcher indexSearcher) throws IOException { - if (bitmap.isEmpty()) { - return new MatchNoDocsQuery(); - } - return super.rewrite(indexSearcher); - } - - @Override - protected String toString(byte[] value) { - assert value.length == Integer.BYTES; - return Integer.toString(IntPoint.decodeDimension(value, 0)); - } - }; - } } /** diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java index dfa5fc4567f80..9fce29ae7fbfb 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -30,6 +30,8 @@ import org.roaringbitmap.RoaringBitmap; +import static org.opensearch.search.query.BitmapIndexQuery.checkArgs; + /** * Filter with bitmap *

@@ -43,6 +45,7 @@ public class BitmapDocValuesQuery extends Query implements Accountable { final long max; public BitmapDocValuesQuery(String field, RoaringBitmap bitmap) { + checkArgs(field, bitmap); this.field = field; this.bitmap = bitmap; if (!bitmap.isEmpty()) { @@ -111,8 +114,7 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public String toString(String field) { - // bitmap may contain high cardinality, so choose to not show the actual values in it - return field + " cardinality: " + bitmap.getLongCardinality(); + return "BitmapDocValuesQuery(field=" + this.field + ")"; } @Override @@ -139,8 +141,8 @@ public int hashCode() { @Override public long ramBytesUsed() { - return RamUsageEstimator.shallowSizeOfInstance(BitmapDocValuesQuery.class) + RamUsageEstimator.sizeOfObject(field) - + RamUsageEstimator.sizeOfObject(bitmap); + return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOf(field) + bitmap + .getLongSizeInBytes(); } @Override diff --git a/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java new file mode 100644 index 0000000000000..87a7c132be848 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/BitmapIndexQuery.java @@ -0,0 +1,284 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; +import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Objects; + +import org.roaringbitmap.PeekableIntIterator; +import org.roaringbitmap.RoaringBitmap; + +/** + * A query that matches all documents that contain a set of integer numbers represented by bitmap + * + * @opensearch.internal + */ +public class BitmapIndexQuery extends Query implements Accountable { + + private final RoaringBitmap bitmap; + private final String field; + + public BitmapIndexQuery(String field, RoaringBitmap bitmap) { + checkArgs(field, bitmap); + this.bitmap = bitmap; + this.field = field; + } + + static void checkArgs(String field, RoaringBitmap bitmap) { + if (field == null) { + throw new IllegalArgumentException("field must not be null"); + } + if (bitmap == null) { + throw new IllegalArgumentException("bitmap must not be null"); + } + } + + interface BitmapIterator extends BytesRefIterator { + // wrap IntIterator.next() + BytesRef next(); + + // expose PeekableIntIterator.advanceIfNeeded, advance as long as the next value is smaller than target + void advance(byte[] target); + } + + private static BitmapIterator bitmapEncodedIterator(RoaringBitmap bitmap) { + return new BitmapIterator() { + private final PeekableIntIterator iterator = bitmap.getIntIterator(); + private final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); + + public BytesRef next() { + int value; + if (iterator.hasNext()) { + value = iterator.next(); + } else { + return null; + } + IntPoint.encodeDimension(value, encoded.bytes, 0); + return encoded; + } + + public void advance(byte[] target) { + iterator.advanceIfNeeded(IntPoint.decodeDimension(target, 0)); + } + }; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + // get cardinality is not cheap enough to do when supplying scorers, so do it once per weight + final long cardinality = bitmap.getLongCardinality(); + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); + if (scorerSupplier == null) { + return null; + } + return scorerSupplier.get(Long.MAX_VALUE); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + final Weight weight = this; + LeafReader reader = context.reader(); + // get the point value which should be one dimension, since bitmap saves integers + PointValues values = reader.getPointValues(field); + if (values == null) { + return null; + } + if (values.getNumIndexDimensions() != 1) { + throw new IllegalArgumentException("field must have only one dimension"); + } + + return new ScorerSupplier() { + long cost = -1; + + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); + final MergePointVisitor visitor = new MergePointVisitor(result); + + @Override + public Scorer get(long leadCost) throws IOException { + values.intersect(visitor); + return new ConstantScoreScorer(weight, score(), scoreMode, result.build().iterator()); + } + + @Override + public long cost() { + if (cost == -1) { + // rough estimate of the cost, 20 times penalty is based on the experiment results + // details in https://github.com/opensearch-project/OpenSearch/pull/16936 + cost = cardinality * 20; + } + return cost; + } + }; + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // This query depend only on segment-immutable structure — points + return true; + } + }; + } + + private class MergePointVisitor implements PointValues.IntersectVisitor { + private final DocIdSetBuilder result; + private final BitmapIterator iterator; + private BytesRef nextQueryPoint; + private final ArrayUtil.ByteArrayComparator comparator; + private DocIdSetBuilder.BulkAdder adder; + + public MergePointVisitor(DocIdSetBuilder result) throws IOException { + this.result = result; + this.comparator = ArrayUtil.getUnsignedComparator(Integer.BYTES); + this.iterator = bitmapEncodedIterator(bitmap); + nextQueryPoint = iterator.next(); + } + + @Override + public void grow(int count) { + adder = result.grow(count); + } + + @Override + public void visit(int docID) { + adder.add(docID); + } + + @Override + public void visit(DocIdSetIterator iterator) throws IOException { + adder.add(iterator); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (matches(packedValue)) { + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue)) { + adder.add(iterator); + } + } + + private boolean matches(byte[] packedValue) { + while (nextQueryPoint != null) { + int cmp = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, packedValue, 0); + if (cmp == 0) { + return true; + } else if (cmp < 0) { + // Query point is before index point, so we move to next query point + iterator.advance(packedValue); + nextQueryPoint = iterator.next(); + } else { + // Query point is after index point, so we don't collect and we return: + break; + } + } + return false; + } + + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + while (nextQueryPoint != null) { + int cmpMin = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, minPackedValue, 0); + if (cmpMin < 0) { + // query point is before the start of this cell + iterator.advance(minPackedValue); + nextQueryPoint = iterator.next(); + continue; + } + int cmpMax = comparator.compare(nextQueryPoint.bytes, nextQueryPoint.offset, maxPackedValue, 0); + if (cmpMax > 0) { + // query point is after the end of this cell + return PointValues.Relation.CELL_OUTSIDE_QUERY; + } + + if (cmpMin == 0 && cmpMax == 0) { + // NOTE: we only hit this if we are on a cell whose min and max values are exactly equal + // to our point, + // which can easily happen if many (> 512) docs share this one value + return PointValues.Relation.CELL_INSIDE_QUERY; + } else { + return PointValues.Relation.CELL_CROSSES_QUERY; + } + } + + // We exhausted all points in the query: + return PointValues.Relation.CELL_OUTSIDE_QUERY; + } + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (bitmap.isEmpty()) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public String toString(String field) { + return "BitmapIndexQuery(field=" + this.field + ")"; + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + BitmapIndexQuery that = (BitmapIndexQuery) other; + return field.equals(that.field) && bitmap.equals(that.bitmap); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, bitmap); + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.shallowSizeOfInstance(BitmapIndexQuery.class) + RamUsageEstimator.sizeOf(field) + bitmap + .getLongSizeInBytes(); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index c06371bed9357..e1551e225b307 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -76,6 +76,7 @@ import org.opensearch.search.DocValueFormat; import org.opensearch.search.MultiValueMode; import org.opensearch.search.query.BitmapDocValuesQuery; +import org.opensearch.search.query.BitmapIndexQuery; import org.junit.Before; import java.io.ByteArrayInputStream; @@ -962,13 +963,16 @@ public void testBitmapQuery() throws IOException { NumberFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.INTEGER); assertEquals( - new IndexOrDocValuesQuery(NumberType.bitmapIndexQuery("field", r), new BitmapDocValuesQuery("field", r)), + new IndexOrDocValuesQuery(new BitmapIndexQuery("field", r), new BitmapDocValuesQuery("field", r)), ft.bitmapQuery(bitmap) ); ft = new NumberFieldType("field", NumberType.INTEGER, false, false, true, true, null, Collections.emptyMap()); assertEquals(new BitmapDocValuesQuery("field", r), ft.bitmapQuery(bitmap)); + ft = new NumberFieldType("field", NumberType.INTEGER, true, false, false, true, null, Collections.emptyMap()); + assertEquals(new BitmapIndexQuery("field", r), ft.bitmapQuery(bitmap)); + Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()); DirectoryReader reader = DirectoryReader.open(w); diff --git a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java index 6e293d1ec69fd..d103b335588bc 100644 --- a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java +++ b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java @@ -12,14 +12,9 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; import org.opensearch.test.OpenSearchTestCase; @@ -28,12 +23,13 @@ import java.io.IOException; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Set; import org.roaringbitmap.RoaringBitmap; +import static org.opensearch.search.query.BitmapIndexQueryTests.getMatchingValues; + public class BitmapDocValuesQueryTests extends OpenSearchTestCase { private Directory dir; private IndexWriter w; @@ -81,21 +77,7 @@ public void testScore() throws IOException { Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); - List actual = new LinkedList<>(); - for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { - // use doc values to get the actual value of the matching docs and assert - // cannot directly check the docId because test can randomize segment numbers - SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); - Scorer scorer = weight.scorer(leaf); - DocIdSetIterator disi = scorer.iterator(); - int docId; - while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - dv.advanceExact(docId); - for (int count = 0; count < dv.docValueCount(); ++count) { - actual.add((int) dv.nextValue()); - } - } - } + List actual = getMatchingValues(weight, searcher.getIndexReader()); List expected = List.of(1, 4); assertEquals(expected, actual); } @@ -128,21 +110,7 @@ public void testScoreMutilValues() throws IOException { Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); - Set actual = new HashSet<>(); - for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { - // use doc values to get the actual value of the matching docs and assert - // cannot directly check the docId because test can randomize segment numbers - SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); - Scorer scorer = weight.scorer(leaf); - DocIdSetIterator disi = scorer.iterator(); - int docId; - while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - dv.advanceExact(docId); - for (int count = 0; count < dv.docValueCount(); ++count) { - actual.add((int) dv.nextValue()); - } - } - } + Set actual = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); Set expected = Set.of(2, 3); assertEquals(expected, actual); } diff --git a/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java new file mode 100644 index 0000000000000..32228fce4c8c5 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/BitmapIndexQueryTests.java @@ -0,0 +1,283 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; +import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.roaringbitmap.RoaringBitmap; + +public class BitmapIndexQueryTests extends OpenSearchTestCase { + private Directory dir; + private IndexWriter w; + private DirectoryReader reader; + private IndexSearcher searcher; + + @Before + public void initSearcher() throws IOException { + dir = newDirectory(); + w = new IndexWriter(dir, newIndexWriterConfig()); + reader = DirectoryReader.open(w); + } + + @After + public void closeAllTheReaders() throws IOException { + reader.close(); + w.close(); + dir.close(); + } + + public void testScore() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(1); + bitmap.add(4); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + List actual = getMatchingValues(weight, searcher.getIndexReader()); + List expected = List.of(1, 4); + assertEquals(expected, actual); + } + + // use doc values to get the actual value of the matching docs + // cannot directly check the docId because test can randomize segment numbers + static List getMatchingValues(Weight weight, IndexReader reader) throws IOException { + List actual = new LinkedList<>(); + for (LeafReaderContext leaf : reader.leaves()) { + SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); + Scorer scorer = weight.scorer(leaf); + DocIdSetIterator disi = scorer.iterator(); + int docId; + while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + dv.advanceExact(docId); + for (int count = 0; count < dv.docValueCount(); ++count) { + actual.add((int) dv.nextValue()); + } + } + } + return actual; + } + + public void testScoreMutilValues() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(3); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actual = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); + Set expected = Set.of(2, 3); + assertEquals(expected, actual); + } + + public void testRandomDocumentsAndQueries() throws IOException { + Random random = Randomness.get(); + int valueRange = 10_000; // the range of query values should be within indexed values + + for (int i = 0; i < valueRange + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", i, Field.Store.NO)); + w.addDocument(d); + } + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + // Generate random values for bitmap query + Set queryValues = new HashSet<>(); + int numberOfValues = 5; + for (int i = 0; i < numberOfValues; i++) { + int value = random.nextInt(valueRange) + 1; + queryValues.add(value); + } + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(queryValues.stream().mapToInt(Integer::intValue).toArray()); + + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actualSet = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); + + List expected = new ArrayList<>(queryValues); + Collections.sort(expected); + List actual = new ArrayList<>(actualSet); + Collections.sort(actual); + assertEquals(expected, actual); + } + + public void testCheckArgsNullBitmap() { + /** + * Test that checkArgs throws IllegalArgumentException when bitmap is null + */ + assertThrows(IllegalArgumentException.class, () -> BitmapIndexQuery.checkArgs("field", null)); + } + + public void testCheckArgsNullField() { + /** + * Test that checkArgs throws IllegalArgumentException when field is null + */ + RoaringBitmap bitmap = new RoaringBitmap(); + assertThrows(IllegalArgumentException.class, () -> BitmapIndexQuery.checkArgs(null, bitmap)); + } + + public void testCheckArgsWithNullBitmap() { + assertThrows(IllegalArgumentException.class, () -> { BitmapIndexQuery.checkArgs("product_id", null); }); + } + + public void testCheckArgsWithNullFieldAndBitmap() { + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> { BitmapIndexQuery.checkArgs(null, null); } + ); + assertEquals("field must not be null", exception.getMessage()); + } + + public void testCreateWeight() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(1); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + Weight weight = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f); + assertNotNull(weight); + Scorer scorer = weight.scorer(reader.leaves().get(0)); + assertNotNull(scorer); + ScorerSupplier supplier = weight.scorerSupplier(reader.leaves().get(0)); + assertNotNull(supplier); + long cost = supplier.cost(); + assertEquals(20, cost); + } + + public void testRewrite() throws IOException { + RoaringBitmap bitmap = new RoaringBitmap(); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + assertEquals(new MatchNoDocsQuery(), query.rewrite(searcher)); + } + + public void testPointVisitor() throws IOException { + w.close(); + // default codec uses 512 documents per leaf node, so we can cover the visit disi methods in PointVisitor + w = new IndexWriter(dir, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec())); + + for (int i = 0; i < 512 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + } + + for (int i = 0; i < 256 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + w.addDocument(d); + } + + for (int i = 0; i < 256 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + } + + for (int i = 0; i < 512 + 1; i++) { + Document d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + } + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(0, 1, 2, 3, 5); + BitmapIndexQuery query = new BitmapIndexQuery("product_id", bitmap); + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actual = new HashSet<>(getMatchingValues(weight, searcher.getIndexReader())); + Set expected = Set.of(1, 2, 3); + assertEquals(expected, actual); + } +}