From a454388b80e2a92640b79792f2238acd0e7872b9 Mon Sep 17 00:00:00 2001 From: Peter Gromov Date: Fri, 19 May 2023 21:36:05 +0200 Subject: [PATCH] hunspell (minor): reduce allocations when processing compound rules (#12316) --- .../lucene/analysis/hunspell/Dictionary.java | 8 ++++---- .../lucene/analysis/hunspell/Hunspell.java | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 2249e70249ec..b5d13271c3f2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -155,7 +155,7 @@ public class Dictionary { boolean checkCompoundCase, checkCompoundDup, checkCompoundRep; boolean checkCompoundTriple, simplifiedTriple; int compoundMin = 3, compoundMax = Integer.MAX_VALUE; - List compoundRules; // nullable + CompoundRule[] compoundRules; // nullable List checkCompoundPatterns = new ArrayList<>(); // ignored characters (dictionary, affix, inputs) @@ -601,11 +601,11 @@ private String[] splitBySpace(LineNumberReader reader, String line, int minParts return parts; } - private List parseCompoundRules(LineNumberReader reader, int num) + private CompoundRule[] parseCompoundRules(LineNumberReader reader, int num) throws IOException, ParseException { - List compoundRules = new ArrayList<>(); + CompoundRule[] compoundRules = new CompoundRule[num]; for (int i = 0; i < num; i++) { - compoundRules.add(new CompoundRule(singleArgument(reader, reader.readLine()), this)); + compoundRules[i] = new CompoundRule(singleArgument(reader, reader.readLine()), this); } return compoundRules; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java index 1e2a1add13cd..3b58e0f4f980 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Hunspell.java @@ -450,7 +450,7 @@ private boolean checkCompoundRules( if (forms != null) { words.add(forms); - if (dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words))) { + if (mayHaveCompoundRule(words)) { if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) { return true; } @@ -467,6 +467,15 @@ private boolean checkCompoundRules( return false; } + private boolean mayHaveCompoundRule(List words) { + for (CompoundRule rule : dictionary.compoundRules) { + if (rule.mayMatch(words)) { + return true; + } + } + return false; + } + private boolean checkLastCompoundPart( char[] wordChars, int start, int length, List words) { IntsRef ref = new IntsRef(new int[1], 0, 1); @@ -475,7 +484,12 @@ private boolean checkLastCompoundPart( Stemmer.RootProcessor stopOnMatching = (stem, formID, morphDataId, outerPrefix, innerPrefix, outerSuffix, innerSuffix) -> { ref.ints[0] = formID; - return dictionary.compoundRules.stream().noneMatch(r -> r.fullyMatches(words)); + for (CompoundRule r : dictionary.compoundRules) { + if (r.fullyMatches(words)) { + return false; + } + } + return true; }; boolean found = !stemmer.doStem(wordChars, start, length, COMPOUND_RULE_END, stopOnMatching); words.remove(words.size() - 1);