From 894e33f70505f05e7fc467fbceddac2dcf27d99c Mon Sep 17 00:00:00 2001
From: Greg Miller <gsmiller@gmail.com>
Date: Thu, 25 May 2023 10:35:11 -0700
Subject: [PATCH] GH#12321: Marked DaciukMihovAutomatonBuilder as deprecated

Preparing to reduce visibility of this class in a future release
---
 lucene/CHANGES.txt                                   |  3 +++
 .../lucene/analysis/core/TestFlattenGraphFilter.java |  4 ++--
 .../org/apache/lucene/util/automaton/Automata.java   |  5 +++++
 .../util/automaton/DaciukMihovAutomatonBuilder.java  | 12 ++++++++++--
 .../lucene/analysis/TestAutomatonToTokenStream.java  |  8 ++++----
 .../lucene/util/automaton/TestCompiledAutomaton.java |  2 +-
 .../search/uhighlight/MemoryIndexOffsetStrategy.java |  4 ++--
 .../search/uhighlight/TestUnifiedHighlighter.java    |  7 +++----
 8 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 89d645fa9bf9..3a1bcd63a2ee 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -15,6 +15,9 @@ API Changes
   RuntimePermission "accessDeclaredMembers" is needed in applications using
   SecurityManager.  (Patrick Zhai, Ben Trent, Uwe Schindler)
 
+* GITHUB#xx: DaciukMihovAutomatonBuilder has been marked deprecated in preparation of reducing its visibility in
+  a future release. (Greg Miller)
+
 New Features
 ---------------------
 
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
index 7b35f56016cb..7fa901ac5bf4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFlattenGraphFilter.java
@@ -40,8 +40,8 @@
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.Transition;
 
@@ -780,7 +780,7 @@ public void testPathsNotLost() throws IOException {
     acceptStrings.sort(Comparator.naturalOrder());
 
     acceptStrings = acceptStrings.stream().limit(wordCount).collect(Collectors.toList());
-    Automaton nonFlattenedAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+    Automaton nonFlattenedAutomaton = Automata.makeStringUnion(acceptStrings);
 
     TokenStream ts = AutomatonToTokenStream.toTokenStream(nonFlattenedAutomaton);
     TokenStream flattenedTokenStream = new FlattenGraphFilter(ts);
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
index 9a642338f09b..def98684d07b 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
@@ -40,6 +40,11 @@
  * @lucene.experimental
  */
 public final class Automata {
+  /**
+   * {@link #makeStringUnion(Collection)} limits terms of this max length to ensure the stack
+   * doesn't overflow while building, since our algorithm currently relies on recursion.
+   */
+  public static final int MAX_STRING_UNION_TERM_LENGTH = 1000;
 
   private Automata() {}
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
index 2fe13101168e..a0ff7d3f13a0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
@@ -32,14 +32,19 @@
  *
  * @see #build(Collection)
  * @see Automata#makeStringUnion(Collection)
+ * @deprecated Visibility of this class will be reduced in a future release. Users can access this
+ *     functionality directly through {@link Automata#makeStringUnion(Collection)}
  */
+@Deprecated
 public final class DaciukMihovAutomatonBuilder {
 
   /**
    * This builder rejects terms that are more than 1k chars long since it then uses recursion based
    * on the length of the string, which might cause stack overflows.
+   *
+   * @deprecated See {@link Automata#MAX_STRING_UNION_TERM_LENGTH}
    */
-  public static final int MAX_TERM_LENGTH = 1_000;
+  @Deprecated public static final int MAX_TERM_LENGTH = 1_000;
 
   /** The default constructor is private. Use static methods directly. */
   private DaciukMihovAutomatonBuilder() {
@@ -193,7 +198,7 @@ private static boolean referenceEquals(Object[] a1, Object[] a2) {
    * or equal compared to any previous sequences added to this automaton (the input must be sorted).
    */
   private void add(CharsRef current) {
-    if (current.length > MAX_TERM_LENGTH) {
+    if (current.length > Automata.MAX_STRING_UNION_TERM_LENGTH) {
       throw new IllegalArgumentException(
           "This builder doesn't allow terms that are larger than 1,000 characters, got " + current);
     }
@@ -266,7 +271,10 @@ private static int convert(
   /**
    * Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
    * strings in UTF-8. These strings must be binary-sorted.
+   *
+   * @deprecated Please see {@link Automata#makeStringUnion(Collection)} instead
    */
+  @Deprecated
   public static Automaton build(Collection<BytesRef> input) {
     final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();
 
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
index 27d2c72ddb09..d558ad6d3697 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestAutomatonToTokenStream.java
@@ -22,8 +22,8 @@
 import java.util.List;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
 
 public class TestAutomatonToTokenStream extends BaseTokenStreamTestCase {
 
@@ -31,7 +31,7 @@ public void testSinglePath() throws IOException {
     List<BytesRef> acceptStrings = new ArrayList<>();
     acceptStrings.add(new BytesRef("abc"));
 
-    Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+    Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
     TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
     assertTokenStreamContents(
         ts,
@@ -48,7 +48,7 @@ public void testParallelPaths() throws IOException {
     acceptStrings.add(new BytesRef("123"));
     acceptStrings.add(new BytesRef("abc"));
 
-    Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+    Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
     TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
     assertTokenStreamContents(
         ts,
@@ -65,7 +65,7 @@ public void testForkedPath() throws IOException {
     acceptStrings.add(new BytesRef("ab3"));
     acceptStrings.add(new BytesRef("abc"));
 
-    Automaton flatPathAutomaton = DaciukMihovAutomatonBuilder.build(acceptStrings);
+    Automaton flatPathAutomaton = Automata.makeStringUnion(acceptStrings);
     TokenStream ts = AutomatonToTokenStream.toTokenStream(flatPathAutomaton);
     assertTokenStreamContents(
         ts,
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
index 5d63b8051cd0..4640bea2dc2c 100644
--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestCompiledAutomaton.java
@@ -35,7 +35,7 @@ private CompiledAutomaton build(int determinizeWorkLimit, String... strings) {
       terms.add(new BytesRef(s));
     }
     Collections.sort(terms);
-    final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
+    final Automaton a = Automata.makeStringUnion(terms);
     return new CompiledAutomaton(a, true, false, determinizeWorkLimit, false);
   }
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index aad68190c644..851c4639df80 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -30,7 +30,7 @@
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.queries.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.lucene.util.automaton.Automata;
 
 /**
  * Uses an {@link Analyzer} on content to get offsets and then populates a {@link MemoryIndex}.
@@ -67,7 +67,7 @@ private static CharArrayMatcher buildCombinedAutomaton(UHComponents components)
       // to build an automaton on them
       List<BytesRef> filteredTerms =
           Arrays.stream(components.getTerms())
-              .filter(b -> b.length < DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH)
+              .filter(b -> b.length < Automata.MAX_STRING_UNION_TERM_LENGTH)
               .collect(Collectors.toList());
       allAutomata.add(CharArrayMatcher.fromTerms(filteredTerms));
     }
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 6cc524a9fac4..54792a569f76 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -58,7 +58,7 @@
 import org.apache.lucene.tests.analysis.MockTokenizer;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.lucene.util.automaton.Automata;
 import org.junit.After;
 import org.junit.Before;
 
@@ -1671,12 +1671,11 @@ public void testQueryWithLongTerm() throws IOException {
     Query query =
         new BooleanQuery.Builder()
             .add(
-                new TermQuery(
-                    new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH))),
+                new TermQuery(new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH))),
                 BooleanClause.Occur.SHOULD)
             .add(
                 new TermQuery(
-                    new Term("title", "a".repeat(DaciukMihovAutomatonBuilder.MAX_TERM_LENGTH + 1))),
+                    new Term("title", "a".repeat(Automata.MAX_STRING_UNION_TERM_LENGTH + 1))),
                 BooleanClause.Occur.SHOULD)
             .add(new TermQuery(new Term("title", "title")), BooleanClause.Occur.SHOULD)
             .build();