Skip to content

Commit

Permalink
GH#12321: Marked DaciukMihovAutomatonBuilder as deprecated (#12332)
Browse files Browse the repository at this point in the history
Preparing to reduce visibility of this class in a future release
  • Loading branch information
gsmiller authored May 26, 2023
1 parent 02db735 commit a990c15
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 15 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ API Changes
RuntimePermission "accessDeclaredMembers" is needed in applications using
SecurityManager. (Patrick Zhai, Ben Trent, Uwe Schindler)

* GITHUB#xx: DaciukMihovAutomatonBuilder has been marked deprecated in preparation of reducing its visibility in
a future release. (Greg Miller)

New Features
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;

Expand Down Expand Up @@ -780,7 +780,7 @@ public void testPathsNotLost() throws IOException {
acceptStrings.sort(Comparator.naturalOrder());

acceptStrings = acceptStrings.stream().limit(wordCount).collect(Collectors.toList());
Automaton nonFlattenedAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
Automaton nonFlattenedAutomaton = Automata.makeStringUnion(acceptStrings);

TokenStream ts = AutomatonToTokenStream.toTokenStream(nonFlattenedAutomaton);
TokenStream flattenedTokenStream = new FlattenGraphFilter(ts);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
* @lucene.experimental
*/
public final class Automata {
/**
* {@link #makeStringUnion(Collection)} limits terms of this max length to ensure the stack
* doesn't overflow while building, since our algorithm currently relies on recursion.
*/
public static final int MAX_STRING_UNION_TERM_LENGTH = 1000;

private Automata() {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,19 @@
*
* @see #build(Collection)
* @see Automata#makeStringUnion(Collection)
* @deprecated Visibility of this class will be reduced in a future release. Users can access this
* functionality directly through {@link Automata#makeStringUnion(Collection)}
*/
@Deprecated
public final class DaciukMihovAutomatonBuilder {

/**
* This builder rejects terms that are more than 1k chars long since it then uses recursion based
* on the length of the string, which might cause stack overflows.
*
* @deprecated See {@link Automata#MAX_STRING_UNION_TERM_LENGTH}
*/
public static final int MAX_TERM_LENGTH = 1_000;
@Deprecated public static final int MAX_TERM_LENGTH = 1_000;

/** The default constructor is private. Use static methods directly. */
private DaciukMihovAutomatonBuilder() {
Expand Down Expand Up @@ -193,7 +198,7 @@ private static boolean referenceEquals(Object[] a1, Object[] a2) {
* or equal compared to any previous sequences added to this automaton (the input must be sorted).
*/
private void add(CharsRef current) {
if (current.length > MAX_TERM_LENGTH) {
if (current.length > Automata.MAX_STRING_UNION_TERM_LENGTH) {
throw new IllegalArgumentException(
"This builder doesn't allow terms that are larger than 1,000 characters, got " + current);
}
Expand Down Expand Up @@ -266,7 +271,10 @@ private static int convert(
/**
* Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
* strings in UTF-8. These strings must be binary-sorted.
*
* @deprecated Please see {@link Automata#makeStringUnion(Collection)} instead
*/
@Deprecated
public static Automaton build(Collection<BytesRef> input) {
final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@
import java.util.List;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;

public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase {

public void testSinglePath() throws IOException {
List<BytesRef> acceptStrings = new ArrayList<>();
acceptStrings.add(new BytesRef("abc"));

Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
assertTokenStreamContents(
ts,
Expand All @@ -48,7 +48,7 @@ public void testParallelPaths() throws IOException {
acceptStrings.add(new BytesRef("123"));
acceptStrings.add(new BytesRef("abc"));

Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
assertTokenStreamContents(
ts,
Expand All @@ -65,7 +65,7 @@ public void testForkedPath() throws IOException {
acceptStrings.add(new BytesRef("ab3"));
acceptStrings.add(new BytesRef("abc"));

Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
assertTokenStreamContents(
ts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ private CompiledAutomaton build(int determinizeWorkLimit, String... strings) {
terms.add(new BytesRef(s));
}
Collections.sort(terms);
final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
final Automaton a = Automata.makeStringUnion(terms);
return new CompiledAutomaton(a, true, false, determinizeWorkLimit, false);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
import org.apache.lucene.util.automaton.Automata;

/**
* Uses an {@link Analyzer} on content to get offsets and then populates a {@link MemoryIndex}.
Expand Down Expand Up @@ -67,7 +67,7 @@ private static CharArrayMatcher buildCombinedAutomaton(UHComponents components)
// to build an automaton on them
List<BytesRef> filteredTerms =
Arrays.stream(components.getTerms())
.filter(b -> b.length < DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH)
.filter(b -> b.length < Automata.MAX_STRING_UNION_TERM_LENGTH)
.collect(Collectors.toList());
allAutomata.add(CharArrayMatcher.fromTerms(filteredTerms));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
import org.apache.lucene.util.automaton.Automata;
import org.junit.After;
import org.junit.Before;

Expand Down Expand Up @@ -1671,12 +1671,11 @@ public void testQueryWithLongTerm() throws IOException {
Query query =
new BooleanQuery.Builder()
.add(
new TermQuery(
new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH))),
new TermQuery(new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH))),
BooleanClause.Occur.SHOULD)
.add(
new TermQuery(
new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH + 1))),
new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH + 1))),
BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("title", "title")), BooleanClause.Occur.SHOULD)
.build();
Expand Down

0 comments on commit a990c15

Please sign in to comment.