diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java index 89522781fdbd..9ecf748418f6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java @@ -43,8 +43,8 @@ */ public final class Automata { /** - * {@link #makeStringUnion(Collection)} limits terms of this max length to ensure the stack - * doesn't overflow while building, since our algorithm currently relies on recursion. + * {@link #makeStringUnion(Iterable)} limits terms of this max length to ensure the stack doesn't + * overflow while building, since our algorithm currently relies on recursion. */ public static final int MAX_STRING_UNION_TERM_LENGTH = 1000; @@ -576,8 +576,8 @@ public static Automaton makeString(int[] word, int offset, int length) { * @return An {@link Automaton} accepting all input strings. The resulting automaton is codepoint * based (full unicode codepoints on transitions). */ - public static Automaton makeStringUnion(Collection utf8Strings) { - if (utf8Strings.isEmpty()) { + public static Automaton makeStringUnion(Iterable utf8Strings) { + if (utf8Strings.iterator().hasNext() == false) { return makeEmpty(); } else { return StringsToAutomaton.build(utf8Strings, false); @@ -593,8 +593,8 @@ public static Automaton makeStringUnion(Collection utf8Strings) { * @return An {@link Automaton} accepting all input strings. The resulting automaton is binary * based (UTF-8 encoded byte transition labels). */ - public static Automaton makeBinaryStringUnion(Collection utf8Strings) { - if (utf8Strings.isEmpty()) { + public static Automaton makeBinaryStringUnion(Iterable utf8Strings) { + if (utf8Strings.iterator().hasNext() == false) { return makeEmpty(); } else { return StringsToAutomaton.build(utf8Strings, true); diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java index 6c66dc6fd9ed..87b0b6ef8f0f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/StringsToAutomaton.java @@ -18,7 +18,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.Collection; import java.util.HashMap; import java.util.IdentityHashMap; import org.apache.lucene.util.ArrayUtil; @@ -35,8 +34,8 @@ * to directly build a binary {@link Automaton} representation. Users should access this * functionality through {@link Automata} static methods. * - * @see Automata#makeStringUnion(Collection) - * @see Automata#makeBinaryStringUnion(Collection) + * @see Automata#makeStringUnion(Iterable) + * @see Automata#makeBinaryStringUnion(Iterable) * @see Automata#makeStringUnion(BytesRefIterator) * @see Automata#makeBinaryStringUnion(BytesRefIterator) */ @@ -238,7 +237,7 @@ private Automaton completeAndConvert() { * UTF-8 codepoints as transition labels or binary (compiled) transition labels based on {@code * asBinary}. */ - static Automaton build(Collection input, boolean asBinary) { + static Automaton build(Iterable input, boolean asBinary) { final StringsToAutomaton builder = new StringsToAutomaton(); for (BytesRef b : input) { @@ -273,9 +272,11 @@ private void add(BytesRef current, boolean asBinary) { + current); } assert stateRegistry != null : "Automaton already built."; - assert previous == null || previous.get().compareTo(current) <= 0 - : "Input must be in sorted UTF-8 order: " + previous.get() + " >= " + current; - assert setPrevious(current); + if (previous != null && previous.get().compareTo(current) > 0) { + throw new IllegalArgumentException( + "Input must be in sorted UTF-8 order: " + previous.get() + " >= " + current); + } + setPrevious(current); // Reusable codepoint information if we're building a non-binary based automaton UnicodeUtil.UTF8CodePoint codePoint = null;