diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java index afc41233906e..ea4f127cdaf0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java @@ -88,6 +88,7 @@ private static class DefaultSetHolder { } private final CharArraySet stemExclusionSet; + private final boolean useStemming; /** Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */ public PersianAnalyzer() { @@ -95,23 +96,35 @@ public PersianAnalyzer() { } /** - * Builds an analyzer with the given stop words + * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE} + * + * @param useStemming whether or not to enable stemming + */ + public PersianAnalyzer(boolean useStemming) { + this(DefaultSetHolder.DEFAULT_STOP_SET, useStemming, CharArraySet.EMPTY_SET); + } + + /** + * Builds an analyzer with the given stop words and no stemming * * @param stopwords a stopword set */ public PersianAnalyzer(CharArraySet stopwords) { - this(stopwords, CharArraySet.EMPTY_SET); + this(stopwords, false, CharArraySet.EMPTY_SET); } /** - * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is provided - * this analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}. + * Builds an analyzer with the given stop word. If a non-empty stem exclusion set is provided this + * analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}. * * @param stopwords a stopword set + * @param useStemming whether or not to enable stemming * @param stemExclusionSet a set of terms not to be stemmed */ - public PersianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + public PersianAnalyzer( + CharArraySet stopwords, boolean useStemming, CharArraySet stemExclusionSet) { super(stopwords); + this.useStemming = useStemming; this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); } @@ -140,7 +153,10 @@ protected TokenStreamComponents createComponents(String fieldName) { if (!stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } - return new TokenStreamComponents(source, new PersianStemFilter(result)); + if (useStemming) { + result = new PersianStemFilter(result); + } + return new TokenStreamComponents(source, result); } @Override diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java index e6498d58fb4c..c6ab97eb9091 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java @@ -227,4 +227,15 @@ public void testRandomStrings() throws Exception { checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER); a.close(); } + + public void testStemming() throws Exception { + { + PersianAnalyzer a = new PersianAnalyzer(); + checkOneTerm(a, "دوستان", "دوستان"); + } + { + PersianAnalyzer a = new PersianAnalyzer(true); + checkOneTerm(a, "دوستان", "دوست"); + } + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java index 983dd0773473..f5ef185bc742 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java @@ -32,7 +32,7 @@ public class TestPersianStemFilter extends BaseTokenStreamTestCase { @Override public void setUp() throws Exception { super.setUp(); - a = new PersianAnalyzer(); + a = new PersianAnalyzer(true); } @Override