Merge branch 'main' into readonly-monitor

* main: LUCENE-10473: Make tests a bit faster when running nightly. (apache#754) LUCENE-9905: Fix check in TestPerFieldKnnVectorsFormat#testMergeUsesNewFormat LUCENE-9614: Fix rare TestKnnVectorQuery failures LUCENE-10472: Fix TestMatchAllDocsQuery#testEarlyTermination (apache#753) LUCENE-10418: Move CHANGES to the correct section. LUCENE-10418: Optimize `Query#rewrite` in the non-scoring case. (apache#672) LUCENE-10469: Fix score mode propagation in ConstantScoreQuery. (apache#750) LUCENE-10452, LUCENE-10451: mention hunspell changes in CHANGES.txt LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (apache#723) Add 9.2.0 section to release notes LUCENE-10451 Hunspell: don't perform potentially expensive spellchecking after timeout (apache#721) LUCENE-10463: increment java version to 17 in smoke tester (apache#748)
sourcesense · Mar 21, 2022 · 0c1d549 · 0c1d549
2 parents de7fa35 + f239c0e
commit 0c1d549
Show file tree

Hide file tree

Showing 24 changed files with 545 additions and 263 deletions.
diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py
@@ -143,10 +143,10 @@ def checkJARMetaData(desc, jarFile, gitRevision, version):
       'Implementation-Vendor: The Apache Software Foundation',
       'Specification-Title: Lucene Search Engine:',
       'Implementation-Title: org.apache.lucene',
-      'X-Compile-Source-JDK: 11',
-      'X-Compile-Target-JDK: 11',
+      'X-Compile-Source-JDK: 17',
+      'X-Compile-Target-JDK: 17',
       'Specification-Version: %s' % version,
-      'X-Build-JDK: 11.',
+      'X-Build-JDK: 17.',
       'Extension-Name: org.apache.lucene'):
       if type(verify) is not tuple:
         verify = (verify,)
@@ -610,20 +610,20 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
 
     validateCmd = './gradlew --no-daemon check -p lucene/documentation'
     print('    run "%s"' % validateCmd)
-    java.run_java11(validateCmd, '%s/validate.log' % unpackPath)
+    java.run_java17(validateCmd, '%s/validate.log' % unpackPath)
 
-    print("    run tests w/ Java 11 and testArgs='%s'..." % testArgs)
-    java.run_java11('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
-    print("    compile jars w/ Java 11")
-    java.run_java11('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
-    testDemo(java.run_java11, isSrc, version, '11')
+    print("    run tests w/ Java 17 and testArgs='%s'..." % testArgs)
+    java.run_java17('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
+    print("    compile jars w/ Java 17")
+    java.run_java17('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
+    testDemo(java.run_java17, isSrc, version, '17')
 
-    if java.run_java17:
-      print("    run tests w/ Java 17 and testArgs='%s'..." % testArgs)
-      java.run_java17('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
-      print("    compile jars w/ Java 17")
-      java.run_java17('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
-      testDemo(java.run_java17, isSrc, version, '17')
+    if java.run_java18:
+      print("    run tests w/ Java 18 and testArgs='%s'..." % testArgs)
+      java.run_java18('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
+      print("    compile jars w/ Java 18")
+      java.run_java18('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
+      testDemo(java.run_java18, isSrc, version, '18')
 
     print('  confirm all releases have coverage in TestBackwardsCompatibility')
     confirmAllReleasesAreTestedForBackCompat(version, unpackPath)
@@ -632,9 +632,9 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):
 
     checkAllJARs(os.getcwd(), gitRevision, version)
 
-    testDemo(java.run_java11, isSrc, version, '11')
-    if java.run_java17:
-      testDemo(java.run_java17, isSrc, version, '17')
+    testDemo(java.run_java17, isSrc, version, '17')
+    if java.run_java18:
+      testDemo(java.run_java18, isSrc, version, '18')
 
   testChangesText('.', version)
 
@@ -910,7 +910,7 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
         sys.stdout.write('.')
 
 
-def make_java_config(parser, java17_home):
+def make_java_config(parser, java18_home):
   def _make_runner(java_home, version):
     print('Java %s JAVA_HOME=%s' % (version, java_home))
     if cygwin:
@@ -924,16 +924,16 @@ def _make_runner(java_home, version):
     def run_java(cmd, logfile):
       run('%s; %s' % (cmd_prefix, cmd), logfile)
     return run_java
-  java11_home =  os.environ.get('JAVA_HOME')
-  if java11_home is None:
+  java17_home =  os.environ.get('JAVA_HOME')
+  if java17_home is None:
     parser.error('JAVA_HOME must be set')
-  run_java11 = _make_runner(java11_home, '11')
-  run_java17 = None
-  if java17_home is not None:
-    run_java17 = _make_runner(java17_home, '17')
+  run_java17 = _make_runner(java17_home, '17')
+  run_java18 = None
+  if java18_home is not None:
+    run_java18 = _make_runner(java18_home, '18')
 
-  jc = namedtuple('JavaConfig', 'run_java11 java11_home run_java17 java17_home')
-  return jc(run_java11, java11_home, run_java17, java17_home)
+  jc = namedtuple('JavaConfig', 'run_java17 java17_home run_java18 java18_home')
+  return jc(run_java17, java17_home, run_java18, java18_home)
 
 version_re = re.compile(r'(\d+\.\d+\.\d+(-ALPHA|-BETA)?)')
 revision_re = re.compile(r'rev-([a-f\d]+)')
@@ -955,8 +955,8 @@ def parse_config():
                       help='GIT revision number that release was built with, defaults to that in URL')
   parser.add_argument('--version', metavar='X.Y.Z(-ALPHA|-BETA)?',
                       help='Version of the release, defaults to that in URL')
-  parser.add_argument('--test-java17', metavar='java17_home',
-                      help='Path to Java17 home directory, to run tests with if specified')
+  parser.add_argument('--test-java18', metavar='java18_home',
+                      help='Path to Java home directory, to run tests with if specified')
   parser.add_argument('--download-only', action='store_true', default=False,
                       help='Only perform download and sha hash check steps')
   parser.add_argument('url', help='Url pointing to release to test')
@@ -983,7 +983,7 @@ def parse_config():
   if c.local_keys is not None and not os.path.exists(c.local_keys):
     parser.error('Local KEYS file "%s" not found' % c.local_keys)
 
-  c.java = make_java_config(parser, c.test_java17)
+  c.java = make_java_config(parser, c.test_java18)
 
   if c.tmp_dir:
     c.tmp_dir = os.path.abspath(c.tmp_dir)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -49,6 +49,36 @@ Other
 * LUCENE-10253: The @BadApple annotation has been removed from the test
   framework. (Adrien Grand)
 
+======================= Lucene 9.2.0 =======================
+API Changes
+---------------------
+(No changes)
+
+New Features
+---------------------
+(No changes)
+
+Improvements
+---------------------
+(No changes)
+
+Optimizations
+---------------------
+* LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (Peter Gromov)
+
+* LUCENE-10451: Hunspell: don't perform potentially expensive spellchecking after timeout (Peter Gromov)
+
+* LUCENE-10418: More `Query#rewrite` optimizations for the non-scoring case.
+  (Adrien Grand)
+
+Bug Fixes
+---------------------
+(No changes)
+
+Other
+---------------------
+(No changes)
+
 ======================= Lucene 9.1.0 =======================
 
 API Changes
@@ -315,6 +345,8 @@ Bug Fixes
   infinite loops in their parent BooleanQuery.
   (Ankit Jain, Daniel Doubrovkine, Adrien Grand)
 
+* LUCENE-10469: Fix ScoreMode propagation by ConstantScoreQuery. (Adrien Grand)
+
 Other
 ---------------------
 

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/GeneratingSuggester.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/GeneratingSuggester.java
@@ -52,7 +52,7 @@ class GeneratingSuggester {
     this.speller = speller;
   }
 
-  List<String> suggest(String word, WordCase originalCase, Set<String> prevSuggestions) {
+  List<String> suggest(String word, WordCase originalCase, Set<Suggestion> prevSuggestions) {
     List<Weighted<Root<String>>> roots = findSimilarDictionaryEntries(word, originalCase);
     List<Weighted<String>> expanded = expandRoots(word, roots);
     TreeSet<Weighted<String>> bySimilarity = rankBySimilarity(word, expanded);
@@ -77,8 +77,6 @@ char transformChar(char c) {
         Math.max(1, word.length() - 4),
         word.length() + 4,
         (rootChars, forms) -> {
-          speller.checkCanceled.run();
-
           assert rootChars.length > 0;
           if (Math.abs(rootChars.length - word.length()) > MAX_ROOT_LENGTH_DIFF) {
             assert rootChars.length < word.length(); // processAllWords takes care of longer keys
@@ -105,6 +103,8 @@ char transformChar(char c) {
             return;
           }
 
+          speller.checkCanceled.run();
+
           String root = rootChars.toString();
           do {
             roots.add(new Weighted<>(new Root<>(root, forms.ints[forms.offset + suitable]), sc));
@@ -331,7 +331,7 @@ private TreeSet<Weighted<String>> rankBySimilarity(String word, List<Weighted<St
   }
 
   private List<String> getMostRelevantSuggestions(
-      TreeSet<Weighted<String>> bySimilarity, Set<String> prevSuggestions) {
+      TreeSet<Weighted<String>> bySimilarity, Set<Suggestion> prevSuggestions) {
     List<String> result = new ArrayList<>();
     boolean hasExcellent = false;
     for (Weighted<String> weighted : bySimilarity) {
@@ -347,7 +347,7 @@ private List<String> getMostRelevantSuggestions(
         break;
       }
 
-      if (prevSuggestions.stream().noneMatch(weighted.word::contains)
+      if (prevSuggestions.stream().noneMatch(s -> weighted.word.contains(s.raw))
           && result.stream().noneMatch(weighted.word::contains)
           && speller.checkWord(weighted.word)) {
         result.add(weighted.word);

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java
@@ -17,19 +17,20 @@
 package org.apache.lucene.analysis.hunspell;
 
 import static org.apache.lucene.analysis.hunspell.Dictionary.FLAG_UNSET;
-import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.*;
+import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.NO_TIMEOUT;
+import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.RETURN_PARTIAL_RESULT;
 import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_BEGIN;
 import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_END;
 import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_MIDDLE;
 import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_RULE_END;
 import static org.apache.lucene.analysis.hunspell.WordContext.SIMPLE_WORD;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Locale;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
@@ -399,9 +400,10 @@ private boolean checkCompoundRules(
       char[] wordChars, int offset, int length, List<IntsRef> words) {
     if (words.size() >= 100) return false;
 
+    checkCanceled.run();
+
     int limit = length - dictionary.compoundMin + 1;
     for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
-      checkCanceled.run();
       IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos);
       if (forms != null) {
         words.add(forms);
@@ -543,25 +545,25 @@ public List<String> suggest(String word, long timeLimitMs) throws SuggestionTime
       }
     }
 
-    LinkedHashSet<String> suggestions = new LinkedHashSet<>();
+    LinkedHashSet<Suggestion> suggestions = new LinkedHashSet<>();
     Runnable checkCanceled =
-        policy == NO_TIMEOUT
-            ? this.checkCanceled
-            : checkTimeLimit(word, wordCase, suggestions, timeLimitMs);
+        policy == NO_TIMEOUT ? this.checkCanceled : checkTimeLimit(word, suggestions, timeLimitMs);
     try {
       doSuggest(word, wordCase, suggestions, checkCanceled);
     } catch (SuggestionTimeoutException e) {
-      if (policy == RETURN_PARTIAL_RESULT) {
-        return postprocess(word, wordCase, suggestions);
+      if (policy != RETURN_PARTIAL_RESULT) {
+        throw e;
       }
-      throw e;
     }
 
-    return postprocess(word, wordCase, suggestions);
+    return postprocess(suggestions);
   }
 
   private void doSuggest(
-      String word, WordCase wordCase, LinkedHashSet<String> suggestions, Runnable checkCanceled) {
+      String word,
+      WordCase wordCase,
+      LinkedHashSet<Suggestion> suggestions,
+      Runnable checkCanceled) {
     Hunspell suggestionSpeller =
         new Hunspell(dictionary, policy, checkCanceled) {
           @Override
@@ -570,22 +572,26 @@ boolean acceptsStem(int formID) {
                 && !dictionary.hasFlag(formID, dictionary.subStandard);
           }
         };
-    ModifyingSuggester modifier = new ModifyingSuggester(suggestionSpeller, suggestions);
-    boolean hasGoodSuggestions = modifier.suggest(word, wordCase);
+    boolean hasGoodSuggestions =
+        new ModifyingSuggester(suggestionSpeller, suggestions, word, wordCase).suggest();
 
     if (!hasGoodSuggestions && dictionary.maxNGramSuggestions > 0) {
-      suggestions.addAll(
+      List<String> generated =
           new GeneratingSuggester(suggestionSpeller)
-              .suggest(dictionary.toLowerCase(word), wordCase, suggestions));
+              .suggest(dictionary.toLowerCase(word), wordCase, suggestions);
+      for (String raw : generated) {
+        suggestions.add(new Suggestion(raw, word, wordCase, suggestionSpeller));
+      }
     }
 
-    if (word.contains("-") && suggestions.stream().noneMatch(s -> s.contains("-"))) {
-      suggestions.addAll(modifyChunksBetweenDashes(word));
+    if (word.contains("-") && suggestions.stream().noneMatch(s -> s.raw.contains("-"))) {
+      for (String raw : modifyChunksBetweenDashes(word)) {
+        suggestions.add(new Suggestion(raw, word, wordCase, suggestionSpeller));
+      }
     }
   }
 
-  private Runnable checkTimeLimit(
-      String word, WordCase wordCase, Set<String> suggestions, long timeLimitMs) {
+  private Runnable checkTimeLimit(String word, Set<Suggestion> suggestions, long timeLimitMs) {
     return new Runnable() {
       final long deadline = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(timeLimitMs);
       int invocationCounter = 100;
@@ -603,38 +609,15 @@ public void run() {
 
       private void stop() {
         List<String> partialResult =
-            policy == RETURN_PARTIAL_RESULT ? null : postprocess(word, wordCase, suggestions);
+            policy == RETURN_PARTIAL_RESULT ? null : postprocess(suggestions);
         String message = "Time limit of " + timeLimitMs + "ms exceeded for " + word;
         throw new SuggestionTimeoutException(message, partialResult);
       }
     };
   }
 
-  private List<String> postprocess(String word, WordCase wordCase, Collection<String> suggestions) {
-    Set<String> result = new LinkedHashSet<>();
-    for (String candidate : suggestions) {
-      result.add(adjustSuggestionCase(candidate, wordCase, word));
-      if (wordCase == WordCase.UPPER && dictionary.checkSharpS && candidate.contains("ß")) {
-        result.add(candidate);
-      }
-    }
-    return result.stream().map(this::cleanOutput).collect(Collectors.toList());
-  }
-
-  private String adjustSuggestionCase(String candidate, WordCase originalCase, String original) {
-    if (originalCase == WordCase.UPPER) {
-      String upper = candidate.toUpperCase(Locale.ROOT);
-      if (upper.contains(" ") || spell(upper)) {
-        return upper;
-      }
-    }
-    if (Character.isUpperCase(original.charAt(0))) {
-      String title = Character.toUpperCase(candidate.charAt(0)) + candidate.substring(1);
-      if (title.contains(" ") || spell(title)) {
-        return title;
-      }
-    }
-    return candidate;
+  private List<String> postprocess(Collection<Suggestion> suggestions) {
+    return suggestions.stream().flatMap(s -> Arrays.stream(s.result)).distinct().toList();
   }
 
   private List<String> modifyChunksBetweenDashes(String word) {
@@ -662,12 +645,4 @@ private List<String> modifyChunksBetweenDashes(String word) {
     }
     return result;
   }
-
-  private String cleanOutput(String s) {
-    if (dictionary.oconv == null) return s;
-
-    StringBuilder sb = new StringBuilder(s);
-    dictionary.oconv.applyMappings(sb);
-    return sb.toString();
-  }
 }