Skip to content

Commit

Permalink
bike shedding
Browse files Browse the repository at this point in the history
  • Loading branch information
mkhludnev committed Jan 2, 2025
1 parent d848f77 commit e63d9ae
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import java.util.List;
import java.util.Objects;
import java.util.TreeSet;
import java.util.function.BiConsumer;
import java.util.function.Supplier;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
Expand Down Expand Up @@ -82,14 +80,13 @@ public int hashCode() {
* behaves like {@link SortedSetDocValuesField#newSlowRangeQuery(String, BytesRef, BytesRef,
* boolean, boolean)} with both true arguments
*/
public static class SordedSetStabbingFixedBuilder
implements BiConsumer<BytesRef, BytesRef>, Supplier<Query> {
public static class SortedSetStabbingFixedBuilder {
protected final String fieldName;
protected final List<Range> clauses = new ArrayList<>();
final List<Range> clauses = new ArrayList<>();
protected final int bytesPerDim;
protected final ArrayUtil.ByteArrayComparator comparator;

public SordedSetStabbingFixedBuilder(String fieldName, int bytesPerDim) {
public SortedSetStabbingFixedBuilder(String fieldName, int bytesPerDim) {
this.fieldName = Objects.requireNonNull(fieldName);
if (bytesPerDim <= 0) {
throw new IllegalArgumentException("bytesPerDim should be a valid value");
Expand All @@ -99,15 +96,11 @@ public SordedSetStabbingFixedBuilder(String fieldName, int bytesPerDim) {
}

// TODO support nulls as min,max boundaries ???
public SordedSetStabbingFixedBuilder add(BytesRef lowerValue, BytesRef upperValue) {
/** NB:Deeply copies the given bytes */
public SortedSetStabbingFixedBuilder add(BytesRef lowerValue, BytesRef upperValue) {
BytesRef lowRef = BytesRef.deepCopyOf(lowerValue);
BytesRef upRef = BytesRef.deepCopyOf(upperValue);
if (this.comparator.compare(lowRef.bytes, 0, upRef.bytes, 0) > 0) {
// TODO let's just ignore so far.
// throw new IllegalArgumentException("lower must be <= upperValue");
} else {
clauses.add(new Range(lowRef, upRef));
}
clauses.add(new Range(lowRef, upRef));
return this;
}

Expand All @@ -127,25 +120,15 @@ SortedSetDocValuesMultiRangeQuery createSortedSetDocValuesMultiRangeQuery() {
return new SortedSetDocValuesMultiRangeQuery(
fieldName, clauses, this.bytesPerDim, comparator);
}

@Override
public void accept(BytesRef bytesRef, BytesRef bytesRef2) {
add(bytesRef, bytesRef2);
}

@Override
public Query get() {
return build();
}
}

/**
* Builder like {@link SordedSetStabbingFixedBuilder} but using log(ranges) lookup per doc value
* Builder like {@link SortedSetStabbingFixedBuilder} but using log(ranges) lookup per doc value
* instead of bitset check
*/
public static class SordedSetStabbingFixedTreeBuilder extends SordedSetStabbingFixedBuilder {
public static class SortedSetStabbingFixedTreeBuilder extends SortedSetStabbingFixedBuilder {

public SordedSetStabbingFixedTreeBuilder(String fieldName, int bytesPerDim) {
public SortedSetStabbingFixedTreeBuilder(String fieldName, int bytesPerDim) {
super(fieldName, bytesPerDim);
}

Expand All @@ -159,24 +142,24 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
return new MultiRangeWeight(boost, scoreMode) {
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
if (context.reader().getFieldInfos().fieldInfo(fieldName) == null) {
return null;
}
SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
SortedSetDocValues values = DocValues.getSortedSet(context.reader(), fieldName);

return new MultiRangeScorerSupplier(values, context) {
@Override
public Scorer get(long leadCost) throws IOException {
assert !rangeClauses.isEmpty() : "Builder should prevent it";
TreeSet<OrdRange> ordRanges =
new TreeSet<>((or1, or2) -> (int) (or1.lower - or2.lower));
bytesRangesToOrdRanges(this.values, ordRanges);
createOrdRanges(this.values, ordRanges);
if (ordRanges.isEmpty()) {
return empty();
}
long minOrd = ordRanges.getFirst().lower, maxOrd = ordRanges.getLast().upper;

DocValuesSkipper skipper = this.context.reader().getDocValuesSkipper(field);
DocValuesSkipper skipper = this.context.reader().getDocValuesSkipper(fieldName);

if (skipper != null
&& (minOrd > skipper.maxValue() || maxOrd < skipper.minValue())) {
Expand All @@ -185,6 +168,10 @@ public Scorer get(long leadCost) throws IOException {

TwoPhaseIterator iterator;
SortedSetDocValues docValues = this.values;
int depth = 1;
for (int pow = 1; pow < ordRanges.size(); pow <<= 1, depth += 1)
;
int finalDepth = depth;
iterator =
new TwoPhaseIterator(docValues) {
// TODO unwrap singleton?
Expand All @@ -206,7 +193,7 @@ public boolean matches() throws IOException {

@Override
public float matchCost() {
return 2; // 2 comparisons
return finalDepth; // 2 comparisons
}
};
// }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,22 @@ private static final class Edge {
private final boolean point;
private final boolean upper;

private static Edge createPoint(DocValuesMultiRangeQuery.Range r) {
return new Edge(r);
}

BytesRef getValue() {
return upper ? range.upper : range.lower;
}

public Edge(DocValuesMultiRangeQuery.Range range, boolean upper) {
private Edge(DocValuesMultiRangeQuery.Range range, boolean upper) {
this.range = range;
this.upper = upper;
this.point = false;
}

/** expecting Arrays.equals(lower.bytes,upper.bytes) i.e. point */
public Edge(DocValuesMultiRangeQuery.Range range) {
private Edge(DocValuesMultiRangeQuery.Range range) {
this.range = range;
this.upper = false;
this.point = true;
Expand All @@ -79,21 +83,22 @@ public OrdRange(long lower, long upper) {
}
}

protected final String field;
protected final String fieldName;
private final int bytesPerDim;
protected final List<DocValuesMultiRangeQuery.Range> rangeClauses;

SortedSetDocValuesMultiRangeQuery(
String name,
String fieldName,
List<DocValuesMultiRangeQuery.Range> clauses,
int bytesPerDim,
ArrayUtil.ByteArrayComparator comparator) {
this.field = name;
this.fieldName = fieldName;
this.bytesPerDim = bytesPerDim;
this.rangeClauses = resolveOverlaps(clauses, comparator);
}

private static ArrayList<DocValuesMultiRangeQuery.Range> resolveOverlaps(
/** Merges overlapping ranges. */
private static List<DocValuesMultiRangeQuery.Range> resolveOverlaps(
List<DocValuesMultiRangeQuery.Range> clauses, ArrayUtil.ByteArrayComparator comparator) {
ArrayList<DocValuesMultiRangeQuery.Range> sortedClauses = new ArrayList<>();
PriorityQueue<Edge> heap =
Expand All @@ -106,7 +111,7 @@ protected boolean lessThan(Edge a, Edge b) {
for (DocValuesMultiRangeQuery.Range r : clauses) {
int cmp = cmp(comparator, r.lower, r.upper);
if (cmp == 0) {
heap.add(new Edge(r));
heap.add(Edge.createPoint(r));
} else {
if (cmp < 0) {
heap.add(new Edge(r, false));
Expand Down Expand Up @@ -169,14 +174,20 @@ public String toString(String fld) {
'}';
}

// TODO how to handle reverse ranges ???
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new MultiRangeWeight(boost, scoreMode);
}

protected void bytesRangesToOrdRanges(SortedSetDocValues values, Collection<OrdRange> ordRanges)
/**
* Resolves ordinals for {@linkplain #rangeClauses}. Caveat: sometimes it updates ranges after
* inserting
*
* @param values doc values to lookup ordinals
* @param ordRanges destination collection for ord ranges
*/
protected void createOrdRanges(SortedSetDocValues values, Collection<OrdRange> ordRanges)
throws IOException {
TermsEnum termsEnum = values.termsEnum();
OrdRange previous = null;
Expand Down Expand Up @@ -219,7 +230,7 @@ protected void bytesRangesToOrdRanges(SortedSetDocValues values, Collection<OrdR

@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
if (visitor.acceptField(fieldName)) {
visitor.visitLeaf(this);
}
}
Expand All @@ -229,14 +240,14 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SortedSetDocValuesMultiRangeQuery that = (SortedSetDocValuesMultiRangeQuery) o;
return Objects.equals(field, that.field)
return Objects.equals(fieldName, that.fieldName)
&& bytesPerDim == that.bytesPerDim
&& Objects.equals(rangeClauses, that.rangeClauses);
}

@Override
public int hashCode() {
return Objects.hash(field, bytesPerDim, rangeClauses);
return Objects.hash(fieldName, bytesPerDim, rangeClauses);
}

protected class MultiRangeWeight extends ConstantScoreWeight {
Expand All @@ -249,10 +260,10 @@ public MultiRangeWeight(float boost, ScoreMode scoreMode) {

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
if (context.reader().getFieldInfos().fieldInfo(fieldName) == null) {
return null;
}
SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
SortedSetDocValues values = DocValues.getSortedSet(context.reader(), fieldName);

return new MultiRangeScorerSupplier(values, context);
}
Expand All @@ -261,7 +272,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return DocValues.isCacheable(ctx, field);
return DocValues.isCacheable(ctx, fieldName);
}

protected class MultiRangeScorerSupplier extends ScorerSupplier {
Expand All @@ -277,14 +288,14 @@ public MultiRangeScorerSupplier(SortedSetDocValues values, LeafReaderContext con
public Scorer get(long leadCost) throws IOException {
assert !rangeClauses.isEmpty() : "Builder should prevent it";
List<OrdRange> ordRanges = new ArrayList<>();
bytesRangesToOrdRanges(values, ordRanges);
createOrdRanges(values, ordRanges);
if (ordRanges.isEmpty()) {
return empty();
}
LongBitSet matchingOrdsShifted = null;
long minOrd = ordRanges.getFirst().lower, maxOrd = ordRanges.getLast().upper;

DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(fieldName);

if (skipper != null && (minOrd > skipper.maxValue() || maxOrd < skipper.minValue())) {
return empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ public byte[] get() {
for (int pass = 0; pass < atLeast(10); pass++) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(4);
DocValuesMultiRangeQuery.SordedSetStabbingFixedBuilder qbuilder =
new DocValuesMultiRangeQuery.SordedSetStabbingFixedBuilder(
DocValuesMultiRangeQuery.SortedSetStabbingFixedBuilder qbuilder =
new DocValuesMultiRangeQuery.SortedSetStabbingFixedBuilder(
"field", InetAddressPoint.BYTES);
for (int q = 0; q < atLeast(10); q++) {
byte[] alfa = random().nextBoolean() ? getRandomIpBytes() : pivotIpsStream.get();
Expand All @@ -155,7 +155,7 @@ public byte[] get() {
}
// ranges.add(InetAddress.getByAddress(alfa));
// ranges.add(InetAddress.getByAddress(beta));
qbuilder.accept(
qbuilder.add(
new BytesRef(InetAddressPoint.encode(InetAddress.getByAddress(alfa))),
new BytesRef(InetAddressPoint.encode(InetAddress.getByAddress(beta))));

Expand All @@ -169,7 +169,7 @@ public byte[] get() {
BooleanClause.Occur.SHOULD);
}
// InetAddress[] addr = ranges.toArray(new InetAddress[0]);
Query multiRange = qbuilder.get();
Query multiRange = qbuilder.build();
long cnt;
BooleanQuery orRanges = bq.build();
if (pass == 0) {
Expand Down Expand Up @@ -220,14 +220,14 @@ private static SortedSetDocValuesField getIpField(String field, byte[] ip)
}

private static Query rangeQuery(String field, InetAddress... addr) throws UnknownHostException {
DocValuesMultiRangeQuery.SordedSetStabbingFixedBuilder qbuilder =
new DocValuesMultiRangeQuery.SordedSetStabbingFixedBuilder(field, InetAddressPoint.BYTES);
DocValuesMultiRangeQuery.SortedSetStabbingFixedBuilder qbuilder =
new DocValuesMultiRangeQuery.SortedSetStabbingFixedBuilder(field, InetAddressPoint.BYTES);
for (int i = 0; i < addr.length; i += 2) {
qbuilder.accept(
qbuilder.add(
new BytesRef(InetAddressPoint.encode(addr[i])),
new BytesRef(InetAddressPoint.encode(addr[i + 1])));
}
return qbuilder.get();
return qbuilder.build();
}

public static byte[] concatenateByteArrays(byte[] array1, byte[] array2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ public void testDuelWithStandardDisjunction() throws IOException {
int numRanges = RandomNumbers.randomIntBetween(random(), 1, 20);
LongPointMultiRangeBuilder builder1 = new LongPointMultiRangeBuilder("point", dims);
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
DocValuesMultiRangeQuery.SordedSetStabbingFixedBuilder builder3 =
DocValuesMultiRangeQuery.SortedSetStabbingFixedBuilder builder3 =
// new DocValuesMultiRangeQuery.SordedSetStabbingFixedBuilder("docVal", Long.BYTES);
new DocValuesMultiRangeQuery.SordedSetStabbingFixedTreeBuilder("docVal", Long.BYTES);
new DocValuesMultiRangeQuery.SortedSetStabbingFixedTreeBuilder("docVal", Long.BYTES);

for (int i = 0; i < numRanges; i++) {
long[] lower = new long[dims];
Expand Down

0 comments on commit e63d9ae

Please sign in to comment.