Skip to content

Commit

Permalink
Merge branch 'main' into readonly-monitor
Browse files Browse the repository at this point in the history
* main:
  LUCENE-10473: Make tests a bit faster when running nightly. (apache#754)
  LUCENE-9905: Fix check in TestPerFieldKnnVectorsFormat#testMergeUsesNewFormat
  LUCENE-9614: Fix rare TestKnnVectorQuery failures
  LUCENE-10472: Fix TestMatchAllDocsQuery#testEarlyTermination (apache#753)
  LUCENE-10418: Move CHANGES to the correct section.
  LUCENE-10418: Optimize `Query#rewrite` in the non-scoring case. (apache#672)
  LUCENE-10469: Fix score mode propagation in ConstantScoreQuery. (apache#750)
  LUCENE-10452, LUCENE-10451: mention hunspell changes in CHANGES.txt
  LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (apache#723)
  Add 9.2.0 section to release notes
  LUCENE-10451 Hunspell: don't perform potentially expensive spellchecking after timeout (apache#721)
  LUCENE-10463: increment java version to 17 in smoke tester (apache#748)
  • Loading branch information
niko.usai committed Mar 21, 2022
2 parents de7fa35 + f239c0e commit 0c1d549
Show file tree
Hide file tree
Showing 24 changed files with 545 additions and 263 deletions.
60 changes: 30 additions & 30 deletions dev-tools/scripts/smokeTestRelease.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,10 @@ def checkJARMetaData(desc, jarFile, gitRevision, version):
'Implementation-Vendor: The Apache Software Foundation',
'Specification-Title: Lucene Search Engine:',
'Implementation-Title: org.apache.lucene',
'X-Compile-Source-JDK: 11',
'X-Compile-Target-JDK: 11',
'X-Compile-Source-JDK: 17',
'X-Compile-Target-JDK: 17',
'Specification-Version: %s' % version,
'X-Build-JDK: 11.',
'X-Build-JDK: 17.',
'Extension-Name: org.apache.lucene'):
if type(verify) is not tuple:
verify = (verify,)
Expand Down Expand Up @@ -610,20 +610,20 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):

validateCmd = './gradlew --no-daemon check -p lucene/documentation'
print(' run "%s"' % validateCmd)
java.run_java11(validateCmd, '%s/validate.log' % unpackPath)
java.run_java17(validateCmd, '%s/validate.log' % unpackPath)

print(" run tests w/ Java 11 and testArgs='%s'..." % testArgs)
java.run_java11('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java 11")
java.run_java11('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(java.run_java11, isSrc, version, '11')
print(" run tests w/ Java 17 and testArgs='%s'..." % testArgs)
java.run_java17('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java 17")
java.run_java17('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(java.run_java17, isSrc, version, '17')

if java.run_java17:
print(" run tests w/ Java 17 and testArgs='%s'..." % testArgs)
java.run_java17('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java 17")
java.run_java17('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(java.run_java17, isSrc, version, '17')
if java.run_java18:
print(" run tests w/ Java 18 and testArgs='%s'..." % testArgs)
java.run_java18('./gradlew --no-daemon test %s' % testArgs, '%s/test.log' % unpackPath)
print(" compile jars w/ Java 18")
java.run_java18('./gradlew --no-daemon jar -Dversion.release=%s' % version, '%s/compile.log' % unpackPath)
testDemo(java.run_java18, isSrc, version, '18')

print(' confirm all releases have coverage in TestBackwardsCompatibility')
confirmAllReleasesAreTestedForBackCompat(version, unpackPath)
Expand All @@ -632,9 +632,9 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs):

checkAllJARs(os.getcwd(), gitRevision, version)

testDemo(java.run_java11, isSrc, version, '11')
if java.run_java17:
testDemo(java.run_java17, isSrc, version, '17')
testDemo(java.run_java17, isSrc, version, '17')
if java.run_java18:
testDemo(java.run_java18, isSrc, version, '18')

testChangesText('.', version)

Expand Down Expand Up @@ -910,7 +910,7 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):
sys.stdout.write('.')


def make_java_config(parser, java17_home):
def make_java_config(parser, java18_home):
def _make_runner(java_home, version):
print('Java %s JAVA_HOME=%s' % (version, java_home))
if cygwin:
Expand All @@ -924,16 +924,16 @@ def _make_runner(java_home, version):
def run_java(cmd, logfile):
run('%s; %s' % (cmd_prefix, cmd), logfile)
return run_java
java11_home = os.environ.get('JAVA_HOME')
if java11_home is None:
java17_home = os.environ.get('JAVA_HOME')
if java17_home is None:
parser.error('JAVA_HOME must be set')
run_java11 = _make_runner(java11_home, '11')
run_java17 = None
if java17_home is not None:
run_java17 = _make_runner(java17_home, '17')
run_java17 = _make_runner(java17_home, '17')
run_java18 = None
if java18_home is not None:
run_java18 = _make_runner(java18_home, '18')

jc = namedtuple('JavaConfig', 'run_java11 java11_home run_java17 java17_home')
return jc(run_java11, java11_home, run_java17, java17_home)
jc = namedtuple('JavaConfig', 'run_java17 java17_home run_java18 java18_home')
return jc(run_java17, java17_home, run_java18, java18_home)

version_re = re.compile(r'(\d+\.\d+\.\d+(-ALPHA|-BETA)?)')
revision_re = re.compile(r'rev-([a-f\d]+)')
Expand All @@ -955,8 +955,8 @@ def parse_config():
help='GIT revision number that release was built with, defaults to that in URL')
parser.add_argument('--version', metavar='X.Y.Z(-ALPHA|-BETA)?',
help='Version of the release, defaults to that in URL')
parser.add_argument('--test-java17', metavar='java17_home',
help='Path to Java17 home directory, to run tests with if specified')
parser.add_argument('--test-java18', metavar='java18_home',
help='Path to Java home directory, to run tests with if specified')
parser.add_argument('--download-only', action='store_true', default=False,
help='Only perform download and sha hash check steps')
parser.add_argument('url', help='Url pointing to release to test')
Expand All @@ -983,7 +983,7 @@ def parse_config():
if c.local_keys is not None and not os.path.exists(c.local_keys):
parser.error('Local KEYS file "%s" not found' % c.local_keys)

c.java = make_java_config(parser, c.test_java17)
c.java = make_java_config(parser, c.test_java18)

if c.tmp_dir:
c.tmp_dir = os.path.abspath(c.tmp_dir)
Expand Down
32 changes: 32 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,36 @@ Other
* LUCENE-10253: The @BadApple annotation has been removed from the test
framework. (Adrien Grand)

======================= Lucene 9.2.0 =======================
API Changes
---------------------
(No changes)

New Features
---------------------
(No changes)

Improvements
---------------------
(No changes)

Optimizations
---------------------
* LUCENE-10452: Hunspell: call checkCanceled less frequently to reduce the overhead (Peter Gromov)

* LUCENE-10451: Hunspell: don't perform potentially expensive spellchecking after timeout (Peter Gromov)

* LUCENE-10418: More `Query#rewrite` optimizations for the non-scoring case.
(Adrien Grand)

Bug Fixes
---------------------
(No changes)

Other
---------------------
(No changes)

======================= Lucene 9.1.0 =======================

API Changes
Expand Down Expand Up @@ -315,6 +345,8 @@ Bug Fixes
infinite loops in their parent BooleanQuery.
(Ankit Jain, Daniel Doubrovkine, Adrien Grand)

* LUCENE-10469: Fix ScoreMode propagation by ConstantScoreQuery. (Adrien Grand)

Other
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class GeneratingSuggester {
this.speller = speller;
}

List<String> suggest(String word, WordCase originalCase, Set<String> prevSuggestions) {
List<String> suggest(String word, WordCase originalCase, Set<Suggestion> prevSuggestions) {
List<Weighted<Root<String>>> roots = findSimilarDictionaryEntries(word, originalCase);
List<Weighted<String>> expanded = expandRoots(word, roots);
TreeSet<Weighted<String>> bySimilarity = rankBySimilarity(word, expanded);
Expand All @@ -77,8 +77,6 @@ char transformChar(char c) {
Math.max(1, word.length() - 4),
word.length() + 4,
(rootChars, forms) -> {
speller.checkCanceled.run();

assert rootChars.length > 0;
if (Math.abs(rootChars.length - word.length()) > MAX_ROOT_LENGTH_DIFF) {
assert rootChars.length < word.length(); // processAllWords takes care of longer keys
Expand All @@ -105,6 +103,8 @@ char transformChar(char c) {
return;
}

speller.checkCanceled.run();

String root = rootChars.toString();
do {
roots.add(new Weighted<>(new Root<>(root, forms.ints[forms.offset + suitable]), sc));
Expand Down Expand Up @@ -331,7 +331,7 @@ private TreeSet<Weighted<String>> rankBySimilarity(String word, List<Weighted<St
}

private List<String> getMostRelevantSuggestions(
TreeSet<Weighted<String>> bySimilarity, Set<String> prevSuggestions) {
TreeSet<Weighted<String>> bySimilarity, Set<Suggestion> prevSuggestions) {
List<String> result = new ArrayList<>();
boolean hasExcellent = false;
for (Weighted<String> weighted : bySimilarity) {
Expand All @@ -347,7 +347,7 @@ private List<String> getMostRelevantSuggestions(
break;
}

if (prevSuggestions.stream().noneMatch(weighted.word::contains)
if (prevSuggestions.stream().noneMatch(s -> weighted.word.contains(s.raw))
&& result.stream().noneMatch(weighted.word::contains)
&& speller.checkWord(weighted.word)) {
result.add(weighted.word);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,20 @@
package org.apache.lucene.analysis.hunspell;

import static org.apache.lucene.analysis.hunspell.Dictionary.FLAG_UNSET;
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.*;
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.NO_TIMEOUT;
import static org.apache.lucene.analysis.hunspell.TimeoutPolicy.RETURN_PARTIAL_RESULT;
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_BEGIN;
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_END;
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_MIDDLE;
import static org.apache.lucene.analysis.hunspell.WordContext.COMPOUND_RULE_END;
import static org.apache.lucene.analysis.hunspell.WordContext.SIMPLE_WORD;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -399,9 +400,10 @@ private boolean checkCompoundRules(
char[] wordChars, int offset, int length, List<IntsRef> words) {
if (words.size() >= 100) return false;

checkCanceled.run();

int limit = length - dictionary.compoundMin + 1;
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
checkCanceled.run();
IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos);
if (forms != null) {
words.add(forms);
Expand Down Expand Up @@ -543,25 +545,25 @@ public List<String> suggest(String word, long timeLimitMs) throws SuggestionTime
}
}

LinkedHashSet<String> suggestions = new LinkedHashSet<>();
LinkedHashSet<Suggestion> suggestions = new LinkedHashSet<>();
Runnable checkCanceled =
policy == NO_TIMEOUT
? this.checkCanceled
: checkTimeLimit(word, wordCase, suggestions, timeLimitMs);
policy == NO_TIMEOUT ? this.checkCanceled : checkTimeLimit(word, suggestions, timeLimitMs);
try {
doSuggest(word, wordCase, suggestions, checkCanceled);
} catch (SuggestionTimeoutException e) {
if (policy == RETURN_PARTIAL_RESULT) {
return postprocess(word, wordCase, suggestions);
if (policy != RETURN_PARTIAL_RESULT) {
throw e;
}
throw e;
}

return postprocess(word, wordCase, suggestions);
return postprocess(suggestions);
}

private void doSuggest(
String word, WordCase wordCase, LinkedHashSet<String> suggestions, Runnable checkCanceled) {
String word,
WordCase wordCase,
LinkedHashSet<Suggestion> suggestions,
Runnable checkCanceled) {
Hunspell suggestionSpeller =
new Hunspell(dictionary, policy, checkCanceled) {
@Override
Expand All @@ -570,22 +572,26 @@ boolean acceptsStem(int formID) {
&& !dictionary.hasFlag(formID, dictionary.subStandard);
}
};
ModifyingSuggester modifier = new ModifyingSuggester(suggestionSpeller, suggestions);
boolean hasGoodSuggestions = modifier.suggest(word, wordCase);
boolean hasGoodSuggestions =
new ModifyingSuggester(suggestionSpeller, suggestions, word, wordCase).suggest();

if (!hasGoodSuggestions && dictionary.maxNGramSuggestions > 0) {
suggestions.addAll(
List<String> generated =
new GeneratingSuggester(suggestionSpeller)
.suggest(dictionary.toLowerCase(word), wordCase, suggestions));
.suggest(dictionary.toLowerCase(word), wordCase, suggestions);
for (String raw : generated) {
suggestions.add(new Suggestion(raw, word, wordCase, suggestionSpeller));
}
}

if (word.contains("-") && suggestions.stream().noneMatch(s -> s.contains("-"))) {
suggestions.addAll(modifyChunksBetweenDashes(word));
if (word.contains("-") && suggestions.stream().noneMatch(s -> s.raw.contains("-"))) {
for (String raw : modifyChunksBetweenDashes(word)) {
suggestions.add(new Suggestion(raw, word, wordCase, suggestionSpeller));
}
}
}

private Runnable checkTimeLimit(
String word, WordCase wordCase, Set<String> suggestions, long timeLimitMs) {
private Runnable checkTimeLimit(String word, Set<Suggestion> suggestions, long timeLimitMs) {
return new Runnable() {
final long deadline = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(timeLimitMs);
int invocationCounter = 100;
Expand All @@ -603,38 +609,15 @@ public void run() {

private void stop() {
List<String> partialResult =
policy == RETURN_PARTIAL_RESULT ? null : postprocess(word, wordCase, suggestions);
policy == RETURN_PARTIAL_RESULT ? null : postprocess(suggestions);
String message = "Time limit of " + timeLimitMs + "ms exceeded for " + word;
throw new SuggestionTimeoutException(message, partialResult);
}
};
}

private List<String> postprocess(String word, WordCase wordCase, Collection<String> suggestions) {
Set<String> result = new LinkedHashSet<>();
for (String candidate : suggestions) {
result.add(adjustSuggestionCase(candidate, wordCase, word));
if (wordCase == WordCase.UPPER && dictionary.checkSharpS && candidate.contains("ß")) {
result.add(candidate);
}
}
return result.stream().map(this::cleanOutput).collect(Collectors.toList());
}

private String adjustSuggestionCase(String candidate, WordCase originalCase, String original) {
if (originalCase == WordCase.UPPER) {
String upper = candidate.toUpperCase(Locale.ROOT);
if (upper.contains(" ") || spell(upper)) {
return upper;
}
}
if (Character.isUpperCase(original.charAt(0))) {
String title = Character.toUpperCase(candidate.charAt(0)) + candidate.substring(1);
if (title.contains(" ") || spell(title)) {
return title;
}
}
return candidate;
private List<String> postprocess(Collection<Suggestion> suggestions) {
return suggestions.stream().flatMap(s -> Arrays.stream(s.result)).distinct().toList();
}

private List<String> modifyChunksBetweenDashes(String word) {
Expand Down Expand Up @@ -662,12 +645,4 @@ private List<String> modifyChunksBetweenDashes(String word) {
}
return result;
}

private String cleanOutput(String s) {
if (dictionary.oconv == null) return s;

StringBuilder sb = new StringBuilder(s);
dictionary.oconv.applyMappings(sb);
return sb.toString();
}
}
Loading

0 comments on commit 0c1d549

Please sign in to comment.