From 894e33f70505f05e7fc467fbceddac2dcf27d99c Mon Sep 17 00:00:00 2001 From: Greg Miller Date: Thu, 25 May 2023 10:35:11 -0700 Subject: [PATCH] GH#12321: Marked DaciukMihovAutomatonBuilder as deprecated Preparing to reduce visibility of this class in a future release --- lucene/CHANGES.txt | 3 +++ .../lucene/analysis/core/TestFlattenGraphFilter.java | 4 ++-- .../org/apache/lucene/util/automaton/Automata.java | 5 +++++ .../util/automaton/DaciukMihovAutomatonBuilder.java | 12 ++++++++++-- .../lucene/analysis/TestAutomatonToTokenStream.java | 8 ++++---- .../lucene/util/automaton/TestCompiledAutomaton.java | 2 +- .../search/uhighlight/MemoryIndexOffsetStrategy.java | 4 ++-- .../search/uhighlight/TestUnifiedHighlighter.java | 7 +++---- 8 files changed, 30 insertions(+), 15 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 89d645fa9bf9..3a1bcd63a2ee 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -15,6 +15,9 @@ API Changes RuntimePermission "accessDeclaredMembers" is needed in applications using SecurityManager. (Patrick Zhai, Ben Trent, Uwe Schindler) +* GITHUB#xx: DaciukMihovAutomatonBuilder has been marked deprecated in preparation of reducing its visibility in + a future release. (Greg Miller) + New Features --------------------- diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java index 7b35f56016cb..7fa901ac5bf4 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java @@ -40,8 +40,8 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder; import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Transition; @@ -780,7 +780,7 @@ public void testPathsNotLost() throws IOException { acceptStrings.sort(Comparator.naturalOrder()); acceptStrings = acceptStrings.stream().limit(wordCount).collect(Collectors.toList()); - Automaton nonFlattenedAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings); + Automaton nonFlattenedAutomaton = Automata.makeStringUnion(acceptStrings); TokenStream ts = AutomatonToTokenStream.toTokenStream(nonFlattenedAutomaton); TokenStream flattenedTokenStream = new FlattenGraphFilter(ts); diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java index 9a642338f09b..def98684d07b 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java @@ -40,6 +40,11 @@ * @lucene.experimental */ public final class Automata { + /** + * {@link #makeStringUnion(Collection)} limits terms of this max length to ensure the stack + * doesn't overflow while building, since our algorithm currently relies on recursion. + */ + public static final int MAX_STRING_UNION_TERM_LENGTH = 1000; private Automata() {} diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java index 2fe13101168e..a0ff7d3f13a0 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java @@ -32,14 +32,19 @@ * * @see #build(Collection) * @see Automata#makeStringUnion(Collection) + * @deprecated Visibility of this class will be reduced in a future release. Users can access this + * functionality directly through {@link Automata#makeStringUnion(Collection)} */ +@Deprecated public final class DaciukMihovAutomatonBuilder { /** * This builder rejects terms that are more than 1k chars long since it then uses recursion based * on the length of the string, which might cause stack overflows. + * + * @deprecated See {@link Automata#MAX_STRING_UNION_TERM_LENGTH} */ - public static final int MAX_TERM_LENGTH = 1_000; + @Deprecated public static final int MAX_TERM_LENGTH = 1_000; /** The default constructor is private. Use static methods directly. */ private DaciukMihovAutomatonBuilder() { @@ -193,7 +198,7 @@ private static boolean referenceEquals(Object[] a1, Object[] a2) { * or equal compared to any previous sequences added to this automaton (the input must be sorted). */ private void add(CharsRef current) { - if (current.length > MAX_TERM_LENGTH) { + if (current.length > Automata.MAX_STRING_UNION_TERM_LENGTH) { throw new IllegalArgumentException( "This builder doesn't allow terms that are larger than 1,000 characters, got " + current); } @@ -266,7 +271,10 @@ private static int convert( /** * Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing * strings in UTF-8. These strings must be binary-sorted. + * + * @deprecated Please see {@link Automata#makeStringUnion(Collection)} instead */ + @Deprecated public static Automaton build(Collection input) { final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java index 27d2c72ddb09..d558ad6d3697 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java @@ -22,8 +22,8 @@ import java.util.List; import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder; public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase { @@ -31,7 +31,7 @@ public void testSinglePath() throws IOException { List acceptStrings = new ArrayList<>(); acceptStrings.add(new BytesRef("abc")); - Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings); + Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings); TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton); assertTokenStreamContents( ts, @@ -48,7 +48,7 @@ public void testParallelPaths() throws IOException { acceptStrings.add(new BytesRef("123")); acceptStrings.add(new BytesRef("abc")); - Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings); + Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings); TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton); assertTokenStreamContents( ts, @@ -65,7 +65,7 @@ public void testForkedPath() throws IOException { acceptStrings.add(new BytesRef("ab3")); acceptStrings.add(new BytesRef("abc")); - Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings); + Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings); TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton); assertTokenStreamContents( ts, diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java index 5d63b8051cd0..4640bea2dc2c 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java @@ -35,7 +35,7 @@ private CompiledAutomaton build(int determinizeWorkLimit, String... strings) { terms.add(new BytesRef(s)); } Collections.sort(terms); - final Automaton a = DaciukMihovAutomatonBuilder.build(terms); + final Automaton a = Automata.makeStringUnion(terms); return new CompiledAutomaton(a, true, false, determinizeWorkLimit, false); } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java index aad68190c644..851c4639df80 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java @@ -30,7 +30,7 @@ import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.queries.spans.SpanQuery; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder; +import org.apache.lucene.util.automaton.Automata; /** * Uses an {@link Analyzer} on content to get offsets and then populates a {@link MemoryIndex}. @@ -67,7 +67,7 @@ private static CharArrayMatcher buildCombinedAutomaton(UHComponents components) // to build an automaton on them List filteredTerms = Arrays.stream(components.getTerms()) - .filter(b -> b.length < DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH) + .filter(b -> b.length < Automata.MAX_STRING_UNION_TERM_LENGTH) .collect(Collectors.toList()); allAutomata.add(CharArrayMatcher.fromTerms(filteredTerms)); } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java index 6cc524a9fac4..54792a569f76 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java @@ -58,7 +58,7 @@ import org.apache.lucene.tests.analysis.MockTokenizer; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.LuceneTestCase; -import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder; +import org.apache.lucene.util.automaton.Automata; import org.junit.After; import org.junit.Before; @@ -1671,12 +1671,11 @@ public void testQueryWithLongTerm() throws IOException { Query query = new BooleanQuery.Builder() .add( - new TermQuery( - new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH))), + new TermQuery(new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH))), BooleanClause.Occur.SHOULD) .add( new TermQuery( - new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH + 1))), + new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH + 1))), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("title", "title")), BooleanClause.Occur.SHOULD) .build();