diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 453c3695a481..faf85efa31a0 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -76,6 +76,8 @@ Optimizations * GITHUB#11857, GITHUB#11859, GITHUB#11893, GITHUB#11909: Hunspell: improved suggestion performance (Peter Gromov) +* GITHUB#12328: Optimize ConjunctionDISI.createConjunction (Armin Braun) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index b70224f1ec50..4fc74b4f0871 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.List; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitSet; @@ -99,24 +98,26 @@ static DocIdSetIterator createConjunction( allIterators.size() > 0 ? allIterators.get(0).docID() : twoPhaseIterators.get(0).approximation.docID(); - boolean iteratorsOnTheSameDoc = allIterators.stream().allMatch(it -> it.docID() == curDoc); - iteratorsOnTheSameDoc = - iteratorsOnTheSameDoc - && twoPhaseIterators.stream().allMatch(it -> it.approximation().docID() == curDoc); - if (iteratorsOnTheSameDoc == false) { - throw new IllegalArgumentException( - "Sub-iterators of ConjunctionDISI are not on the same document!"); + long minCost = Long.MAX_VALUE; + for (DocIdSetIterator allIterator : allIterators) { + if (allIterator.docID() != curDoc) { + throwSubIteratorsNotOnSameDocument(); + } + minCost = Math.min(allIterator.cost(), minCost); + } + for (TwoPhaseIterator it : twoPhaseIterators) { + if (it.approximation().docID() != curDoc) { + throwSubIteratorsNotOnSameDocument(); + } } - - long minCost = allIterators.stream().mapToLong(DocIdSetIterator::cost).min().getAsLong(); List bitSetIterators = new ArrayList<>(); List iterators = new ArrayList<>(); for (DocIdSetIterator iterator : allIterators) { - if (iterator.cost() > minCost && iterator instanceof BitSetIterator) { + if (iterator instanceof BitSetIterator bitSetIterator && bitSetIterator.cost() > minCost) { // we put all bitset iterators into bitSetIterators // except if they have the minimum cost, since we need // them to lead the iteration in that case - bitSetIterators.add((BitSetIterator) iterator); + bitSetIterators.add(bitSetIterator); } else { iterators.add(iterator); } @@ -142,6 +143,11 @@ static DocIdSetIterator createConjunction( return disi; } + private static void throwSubIteratorsNotOnSameDocument() { + throw new IllegalArgumentException( + "Sub-iterators of ConjunctionDISI are not on the same document!"); + } + final DocIdSetIterator lead1, lead2; final DocIdSetIterator[] others; @@ -150,14 +156,7 @@ private ConjunctionDISI(List iterators) { // Sort the array the first time to allow the least frequent DocsEnum to // lead the matching. - CollectionUtil.timSort( - iterators, - new Comparator() { - @Override - public int compare(DocIdSetIterator o1, DocIdSetIterator o2) { - return Long.compare(o1.cost(), o2.cost()); - } - }); + CollectionUtil.timSort(iterators, (o1, o2) -> Long.compare(o1.cost(), o2.cost())); lead1 = iterators.get(0); lead2 = iterators.get(1); others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]); @@ -326,13 +325,7 @@ private ConjunctionTwoPhaseIterator( assert twoPhaseIterators.size() > 0; CollectionUtil.timSort( - twoPhaseIterators, - new Comparator() { - @Override - public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) { - return Float.compare(o1.matchCost(), o2.matchCost()); - } - }); + twoPhaseIterators, (o1, o2) -> Float.compare(o1.matchCost(), o2.matchCost())); this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);