queries) {
* @param term term
* @return new TermQuery instance
*/
- protected Query newTermQuery(Term term) {
- return new TermQuery(term);
+ protected Query newTermQuery(Term term, float boost) {
+ Query q = new TermQuery(term);
+ if (boost == DEFAULT_BOOST) {
+ return q;
+ }
+ return new BoostQuery(q, boost);
}
+
/**
* Builds a new MultiPhraseQuery instance.
*
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
index 7289ead38eff..927dfd4080f9 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.CannedBinaryTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockSynonymFilter;
@@ -32,6 +33,8 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostAttribute;
+import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -507,4 +510,51 @@ public void testMaxBooleanClause() throws Exception {
expectThrows(IndexSearcher.TooManyClauses.class, () -> qb.analyzeGraphPhrase(ts, "", 0));
}
}
+
+ private static final class MockBoostTokenFilter extends TokenFilter {
+
+ final BoostAttribute boostAtt = addAttribute(BoostAttribute.class);
+ final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ protected MockBoostTokenFilter(TokenStream input) {
+ super(input);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken() == false) {
+ return false;
+ }
+ if (termAtt.length() == 3) {
+ boostAtt.setBoost(0.5f);
+ }
+ return true;
+ }
+ }
+
+ public void testTokenStreamBoosts() {
+ Analyzer msa = new MockSynonymAnalyzer();
+ Analyzer a = new AnalyzerWrapper(msa.getReuseStrategy()) {
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ return msa;
+ }
+ @Override
+ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
+ return new TokenStreamComponents(components.getSource(), new MockBoostTokenFilter(components.getTokenStream()));
+ }
+ };
+
+ QueryBuilder builder = new QueryBuilder(a);
+ Query q = builder.createBooleanQuery("field", "hot dogs");
+ Query expected = new BooleanQuery.Builder()
+ .add(new BoostQuery(new TermQuery(new Term("field", "hot")), 0.5f), BooleanClause.Occur.SHOULD)
+ .add(new SynonymQuery.Builder("field")
+ .addTerm(new Term("field", "dogs"))
+ .addTerm(new Term("field", "dog"), 0.5f)
+ .build(), BooleanClause.Occur.SHOULD)
+ .build();
+
+ assertEquals(expected, q);
+ }
}
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
index 9a4043d1d8a3..d552aef9fe75 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
@@ -147,7 +147,7 @@ public Query parse(String query) throws ParseException {
// to throw a runtime exception here if a term for another field is embedded
// in phrase query
@Override
- protected Query newTermQuery(Term term) {
+ protected Query newTermQuery(Term term, float boost) {
if (isPass2ResolvingPhrases) {
try {
checkPhraseClauseIsForSameField(term.field());
@@ -155,7 +155,7 @@ protected Query newTermQuery(Term term) {
throw new RuntimeException("Error parsing complex phrase", pe);
}
}
- return super.newTermQuery(term);
+ return super.newTermQuery(term, boost);
}
// Helper method used to report on any clauses that appear in query syntax
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index 469da7f1c16c..a4084d1509de 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -21,6 +21,8 @@
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -601,19 +603,35 @@ protected Query newRegexpQuery(Term regexp) {
}
@Override
- protected Query newSynonymQuery(Term terms[]) {
+ protected Query newGraphSynonymQuery(Iterator sidePathQueriesIterator) {
+ switch (synonymQueryStyle) {
+ case PICK_BEST: {
+ List sidePathSynonymQueries = new LinkedList<>();
+ sidePathQueriesIterator.forEachRemaining(sidePathSynonymQueries::add);
+ return new DisjunctionMaxQuery(sidePathSynonymQueries, 0.0f);
+ }
+ case AS_SAME_TERM:
+ case AS_DISTINCT_TERMS:{
+ return super.newGraphSynonymQuery(sidePathQueriesIterator);}
+ default:
+ throw new AssertionError("unrecognized synonymQueryStyle passed when creating newSynonymQuery");
+ }
+ }
+
+ @Override
+ protected Query newSynonymQuery(TermAndBoost[] terms) {
switch (synonymQueryStyle) {
case PICK_BEST:
List currPosnClauses = new ArrayList(terms.length);
- for (Term term : terms) {
- currPosnClauses.add(newTermQuery(term));
+ for (TermAndBoost term : terms) {
+ currPosnClauses.add(newTermQuery(term.term, term.boost));
}
DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f);
return dm;
case AS_DISTINCT_TERMS:
BooleanQuery.Builder builder = new BooleanQuery.Builder();
- for (Term term : terms) {
- builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD);
+ for (TermAndBoost term : terms) {
+ builder.add(newTermQuery(term.term, term.boost), BooleanClause.Occur.SHOULD);
}
return builder.build();
case AS_SAME_TERM:
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml
index 1368e6b04fc4..d4cb89e85f87 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml
@@ -227,6 +227,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -656,6 +691,9 @@
+
+
+
diff --git a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt
index 68dbf0bf62b1..d7feb34ee647 100644
--- a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt
+++ b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt
@@ -37,4 +37,18 @@ crow blackbird, grackle
tabby => tabby, cat, feline, animal
persian => persian, cat, feline, animal
-jeans, denim pants
\ No newline at end of file
+jeans, denim pants
+
+# Boosted Synonyms
+tiger, tigre|0.9
+lynx => lince|0.8, lynx_canadensis|0.9
+
+leopard, big cat|0.8, bagheera|0.9, panthera pardus|0.85
+lion => panthera leo|0.9, simba leo|0.8, kimba|0.75
+
+panthera pardus, leopard|0.6
+panthera tigris => tiger|0.99
+
+snow leopard, panthera uncia|0.9, big cat|0.8, white_leopard|0.6
+panthera onca => jaguar|0.95, big cat|0.85, black panther|0.65
+panthera blytheae, oldest|0.5 ancient|0.9 panthera
\ No newline at end of file
diff --git a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymGraphFilterFactory.java b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymGraphFilterFactory.java
index fc1e735b355a..66e9efe5fce4 100644
--- a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymGraphFilterFactory.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymGraphFilterFactory.java
@@ -300,4 +300,80 @@ public void testCanHandleDecodingAndEncodingForSynonyms() throws Exception {
assertJDelete(endpoint+"/fröhlich",
"/error/code==404");
}
+
+ /**
+ * Can we add and single term synonyms with weight
+ */
+ @Test
+ public void testManagedSynonyms_singleTermWithWeight_shouldHandleSynonym() throws Exception {
+ String endpoint = "/schema/analysis/synonyms/englishgraph";
+
+ assertJQ(endpoint,
+ "/synonymMappings/initArgs/ignoreCase==false",
+ "/synonymMappings/managedMap=={}");
+
+ // does not exist
+ assertJQ(endpoint+"/tiger",
+ "/error/code==404");
+
+ Map> syns = new HashMap<>();
+
+ // now put a synonym
+ syns.put("tiger", Arrays.asList("tiger|1.0"));
+ assertJPut(endpoint,
+ toJSONString(syns),
+ "/responseHeader/status==0");
+
+ // and check if it exists
+ assertJQ(endpoint,
+ "/synonymMappings/managedMap/tiger==['tiger|1.0']");
+
+ // verify delete works
+ assertJDelete(endpoint+"/tiger",
+ "/responseHeader/status==0");
+
+
+ // was it really deleted?
+ assertJDelete(endpoint+"/tiger",
+ "/error/code==404");
+ }
+
+ /**
+ * Can we add multi term synonyms with weight
+ */
+ @Test
+ public void testManagedSynonyms_multiTermWithWeight_shouldHandleSynonym() throws Exception {
+ String endpoint = "/schema/analysis/synonyms/englishgraph";
+
+ assertJQ(endpoint,
+ "/synonymMappings/initArgs/ignoreCase==false",
+ "/synonymMappings/managedMap=={}");
+
+ // does not exist
+ assertJQ(endpoint+"/tiger",
+ "/error/code==404");
+
+ Map> syns = new HashMap<>();
+
+ // now put a synonym
+ List tigerSyonyms = Arrays.asList("tiger|1.0", "panthera tigris|0.9", "Shere Kan|0.8");
+ syns.put("tiger", tigerSyonyms);
+ String jsonTigerSynonyms = toJSONString(syns);
+ assertJPut(endpoint,
+ jsonTigerSynonyms,
+ "/responseHeader/status==0");
+
+ // and check if it exists
+ assertJQ(endpoint,
+ "/synonymMappings/managedMap/tiger==[\"Shere Kan|0.8\",\"panthera tigris|0.9\",\"tiger|1.0\"]");
+
+ // verify delete works
+ assertJDelete(endpoint+"/tiger",
+ "/responseHeader/status==0");
+
+
+ // was it really deleted?
+ assertJDelete(endpoint+"/tiger",
+ "/error/code==404");
+ }
}
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
index 9fb2598e2eae..69d12bb7b92a 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
@@ -1221,8 +1221,225 @@ public void testSynonymQueryStyle() throws Exception {
assertEquals("(t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)", q.toString());
q = QParser.getParser("jeans", req(params("df", "t_pick_best_foo", "sow", "false"))).getQuery();
- assertEquals("(t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)", q.toString());
+ assertEquals("(t_pick_best_foo:\"denim pant\" | t_pick_best_foo:jean)", q.toString());
+ }
+
+ public void testSynonymsBoost_singleTermQuerySingleTermSynonyms_shouldParseBoostedQuery() throws Exception {
+ //tiger, tigre|0.9
+ Query q = QParser.getParser("tiger", req(params("df", "t_pick_best_boosted_foo"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:tigre)^0.9 | t_pick_best_boosted_foo:tiger)", q.toString());
+
+ q = QParser.getParser("tiger", req(params("df", "t_as_distinct_boosted_foo"))).getQuery();
+ assertEquals("(t_as_distinct_boosted_foo:tigre)^0.9 t_as_distinct_boosted_foo:tiger", q.toString());
+
+ q = QParser.getParser("tiger", req(params("df", "t_as_same_term_boosted_foo"))).getQuery();
+ assertEquals("Synonym(t_as_same_term_boosted_foo:tiger t_as_same_term_boosted_foo:tigre^0.9)", q.toString());
+
+ //lynx => lince|0.8, lynx_canadensis|0.9
+ q = QParser.getParser("lynx", req(params("df", "t_pick_best_boosted_foo"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:lince)^0.8 | (t_pick_best_boosted_foo:lynx_canadensis)^0.9)", q.toString());
+
+ q = QParser.getParser("lynx", req(params("df", "t_as_distinct_boosted_foo"))).getQuery();
+ assertEquals("(t_as_distinct_boosted_foo:lince)^0.8 (t_as_distinct_boosted_foo:lynx_canadensis)^0.9", q.toString());
+
+ q = QParser.getParser("lynx", req(params("df", "t_as_same_term_boosted_foo"))).getQuery();
+ assertEquals("Synonym(t_as_same_term_boosted_foo:lince^0.8 t_as_same_term_boosted_foo:lynx_canadensis^0.9)", q.toString());
+ }
+
+ public void testSynonymsBoost_singleTermQueryMultiTermSynonyms_shouldParseBoostedQuery() throws Exception {
+ //leopard, big cat|0.8, bagheera|0.9, panthera pardus|0.85
+ Query q = QParser.getParser("leopard", req(params("df", "t_pick_best_boosted_foo"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:\"big cat\")^0.8 | (t_pick_best_boosted_foo:bagheera)^0.9 | (t_pick_best_boosted_foo:\"panthera pardus\")^0.85 | t_pick_best_boosted_foo:leopard)", q.toString());
+
+ q = QParser.getParser("leopard", req(params("df", "t_as_distinct_boosted_foo"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:\"big cat\")^0.8 (t_as_distinct_boosted_foo:bagheera)^0.9 (t_as_distinct_boosted_foo:\"panthera pardus\")^0.85 t_as_distinct_boosted_foo:leopard)", q.toString());
+
+ q = QParser.getParser("leopard", req(params("df", "t_as_same_term_boosted_foo"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:\"big cat\")^0.8 (t_as_same_term_boosted_foo:bagheera)^0.9 (t_as_same_term_boosted_foo:\"panthera pardus\")^0.85 t_as_same_term_boosted_foo:leopard)", q.toString());
+
+ //lion => panthera leo|0.9, simba leo|0.8, kimba|0.75
+ q = QParser.getParser("lion", req(params("df", "t_pick_best_boosted_foo"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:\"panthera leo\")^0.9 | (t_pick_best_boosted_foo:\"simba leo\")^0.8 | (t_pick_best_boosted_foo:kimba)^0.75)", q.toString());
+
+ q = QParser.getParser("lion", req(params("df", "t_as_distinct_boosted_foo"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:\"panthera leo\")^0.9 (t_as_distinct_boosted_foo:\"simba leo\")^0.8 (t_as_distinct_boosted_foo:kimba)^0.75)", q.toString());
+
+ q = QParser.getParser("lion", req(params("df", "t_as_same_term_boosted_foo"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:\"panthera leo\")^0.9 (t_as_same_term_boosted_foo:\"simba leo\")^0.8 (t_as_same_term_boosted_foo:kimba)^0.75)", q.toString());
+ }
+
+ public void testSynonymsBoost_multiTermQuerySingleTermSynonyms_shouldParseBoostedQuery() throws Exception {
+ //tiger, tigre|0.9
+ //lynx => lince|0.8, lynx_canadensis|0.9
+ Query q = QParser.getParser("tiger lynx", req(params("df", "t_pick_best_boosted_foo"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:tigre)^0.9 | t_pick_best_boosted_foo:tiger)" +
+ " ((t_pick_best_boosted_foo:lince)^0.8 | (t_pick_best_boosted_foo:lynx_canadensis)^0.9)", q.toString());
+
+ q = QParser.getParser("tiger lynx", req(params("df", "t_as_distinct_boosted_foo"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:tigre)^0.9 t_as_distinct_boosted_foo:tiger)" +
+ " ((t_as_distinct_boosted_foo:lince)^0.8 (t_as_distinct_boosted_foo:lynx_canadensis)^0.9)", q.toString());
+
+ q = QParser.getParser("tiger lynx", req(params("df", "t_as_same_term_boosted_foo"))).getQuery();
+ assertEquals("Synonym(t_as_same_term_boosted_foo:tiger t_as_same_term_boosted_foo:tigre^0.9)" +
+ " Synonym(t_as_same_term_boosted_foo:lince^0.8 t_as_same_term_boosted_foo:lynx_canadensis^0.9)", q.toString());
+ }
+
+ public void testSynonymsBoost_multiTermQueryMultiTermSynonyms_shouldParseBoostedQuery() throws Exception {
+ //leopard, big cat|0.8, bagheera|0.9, panthera pardus|0.85
+ //lion => panthera leo|0.9, simba leo|0.8, kimba|0.75
+ Query q = QParser.getParser("leopard lion", req(params("df", "t_pick_best_boosted_foo"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:\"big cat\")^0.8 | (t_pick_best_boosted_foo:bagheera)^0.9 | (t_pick_best_boosted_foo:\"panthera pardus\")^0.85 | t_pick_best_boosted_foo:leopard)" +
+ " ((t_pick_best_boosted_foo:\"panthera leo\")^0.9 | (t_pick_best_boosted_foo:\"simba leo\")^0.8 | (t_pick_best_boosted_foo:kimba)^0.75)", q.toString());
+
+ q = QParser.getParser("leopard lion", req(params("df", "t_as_distinct_boosted_foo"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:\"big cat\")^0.8 (t_as_distinct_boosted_foo:bagheera)^0.9 (t_as_distinct_boosted_foo:\"panthera pardus\")^0.85 t_as_distinct_boosted_foo:leopard)" +
+ " ((t_as_distinct_boosted_foo:\"panthera leo\")^0.9 (t_as_distinct_boosted_foo:\"simba leo\")^0.8 (t_as_distinct_boosted_foo:kimba)^0.75)", q.toString());
+
+ q = QParser.getParser("leopard lion", req(params("df", "t_as_same_term_boosted_foo"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:\"big cat\")^0.8 (t_as_same_term_boosted_foo:bagheera)^0.9 (t_as_same_term_boosted_foo:\"panthera pardus\")^0.85 t_as_same_term_boosted_foo:leopard)" +
+ " ((t_as_same_term_boosted_foo:\"panthera leo\")^0.9 (t_as_same_term_boosted_foo:\"simba leo\")^0.8 (t_as_same_term_boosted_foo:kimba)^0.75)", q.toString());
+
+ }
+
+ public void testSynonymsBoost_singleConceptQuerySingleTermSynonym_shouldParseBoostedQuery() throws Exception {
+ //panthera pardus, leopard|0.6
+ Query q = QParser.getParser("panthera pardus story",req(params("df", "t_pick_best_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:leopard)^0.6 | t_pick_best_boosted_foo:\"panthera pardus\") t_pick_best_boosted_foo:story", q.toString());
+
+ q = QParser.getParser("panthera pardus story", req(params("df", "t_as_distinct_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:leopard)^0.6 t_as_distinct_boosted_foo:\"panthera pardus\") t_as_distinct_boosted_foo:story", q.toString());
+
+ q = QParser.getParser("panthera pardus story", req(params("df", "t_as_same_term_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:leopard)^0.6 t_as_same_term_boosted_foo:\"panthera pardus\") t_as_same_term_boosted_foo:story", q.toString());
+
+ //panthera tigris => tiger|0.99
+ q = QParser.getParser("panthera tigris story", req(params("df", "t_pick_best_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("(t_pick_best_boosted_foo:tiger)^0.99 t_pick_best_boosted_foo:story", q.toString());
+
+ q = QParser.getParser("panthera tigris story", req(params("df", "t_as_distinct_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("(t_as_distinct_boosted_foo:tiger)^0.99 t_as_distinct_boosted_foo:story", q.toString());
+
+ q = QParser.getParser("panthera tigris story", req(params("df", "t_as_same_term_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("(t_as_same_term_boosted_foo:tiger)^0.99 t_as_same_term_boosted_foo:story", q.toString());
+ }
+
+ public void testSynonymsBoost_singleConceptQueryMultiTermSynonymWithMultipleBoost_shouldParseMultiplicativeBoostedQuery() throws Exception {
+ //panthera blytheae, oldest|0.5 ancient|0.9 panthera
+ Query q = QParser.getParser("panthera blytheae",req(params("df", "t_pick_best_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:\"oldest ancient panthera\")^0.45 | t_pick_best_boosted_foo:\"panthera blytheae\")", q.toString());
+
+ q = QParser.getParser("panthera blytheae", req(params("df", "t_as_distinct_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:\"oldest ancient panthera\")^0.45 t_as_distinct_boosted_foo:\"panthera blytheae\")", q.toString());
+
+ q = QParser.getParser("panthera blytheae", req(params("df", "t_as_same_term_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:\"oldest ancient panthera\")^0.45 t_as_same_term_boosted_foo:\"panthera blytheae\")", q.toString());
+ }
+
+ public void testSynonymsBoost_singleConceptQueryMultiTermSynonyms_shouldParseBoostedQuery() throws Exception {
+ //snow leopard, panthera uncia|0.9, big cat|0.8, white_leopard|0.6
+ Query q = QParser.getParser("snow leopard",req(params("df", "t_pick_best_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:\"panthera uncia\")^0.9 | (t_pick_best_boosted_foo:\"big cat\")^0.8 | (t_pick_best_boosted_foo:white_leopard)^0.6 | t_pick_best_boosted_foo:\"snow leopard\")", q.toString());
+
+ q = QParser.getParser("snow leopard", req(params("df", "t_as_distinct_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:\"panthera uncia\")^0.9 (t_as_distinct_boosted_foo:\"big cat\")^0.8 (t_as_distinct_boosted_foo:white_leopard)^0.6 t_as_distinct_boosted_foo:\"snow leopard\")", q.toString());
+
+ q = QParser.getParser("snow leopard", req(params("df", "t_as_same_term_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:\"panthera uncia\")^0.9 (t_as_same_term_boosted_foo:\"big cat\")^0.8 (t_as_same_term_boosted_foo:white_leopard)^0.6 t_as_same_term_boosted_foo:\"snow leopard\")", q.toString());
+
+ //panthera onca => jaguar|0.95, big cat|0.85, black panther|0.65
+ q = QParser.getParser("panthera onca", req(params("df", "t_pick_best_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:jaguar)^0.95 | (t_pick_best_boosted_foo:\"big cat\")^0.85 | (t_pick_best_boosted_foo:\"black panther\")^0.65)", q.toString());
+
+ q = QParser.getParser("panthera onca", req(params("df", "t_as_distinct_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:jaguar)^0.95 (t_as_distinct_boosted_foo:\"big cat\")^0.85 (t_as_distinct_boosted_foo:\"black panther\")^0.65)", q.toString());
+
+ q = QParser.getParser("panthera onca", req(params("df", "t_as_same_term_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:jaguar)^0.95 (t_as_same_term_boosted_foo:\"big cat\")^0.85 (t_as_same_term_boosted_foo:\"black panther\")^0.65)", q.toString());
+
+ }
+
+ public void testSynonymsBoost_multiConceptQuerySingleTermSynonym_shouldParseBoostedQuery() throws Exception {
+ //panthera pardus, leopard|0.6
+ //tiger, tigre|0.9
+ Query q = QParser.getParser("panthera pardus tiger",req(params("df", "t_pick_best_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:leopard)^0.6 | t_pick_best_boosted_foo:\"panthera pardus\") ((t_pick_best_boosted_foo:tigre)^0.9 | t_pick_best_boosted_foo:tiger)", q.toString());
+
+ q = QParser.getParser("panthera pardus tiger", req(params("df", "t_as_distinct_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:leopard)^0.6 t_as_distinct_boosted_foo:\"panthera pardus\") ((t_as_distinct_boosted_foo:tigre)^0.9 t_as_distinct_boosted_foo:tiger)", q.toString());
+
+ q = QParser.getParser("panthera pardus tiger", req(params("df", "t_as_same_term_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:leopard)^0.6 t_as_same_term_boosted_foo:\"panthera pardus\") Synonym(t_as_same_term_boosted_foo:tiger t_as_same_term_boosted_foo:tigre^0.9)", q.toString());
+ }
+
+ public void testSynonymsBoost_multiConceptsQueryMultiTermSynonyms_shouldParseBoostedQuery() throws Exception {
+ //snow leopard, panthera uncia|0.9, big cat|0.8, white_leopard|0.6
+ //panthera onca => jaguar|0.95, big cat|0.85, black panther|0.65
+ Query q = QParser.getParser("snow leopard panthera onca",req(params("df", "t_pick_best_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:\"panthera uncia\")^0.9 | (t_pick_best_boosted_foo:\"big cat\")^0.8 | (t_pick_best_boosted_foo:white_leopard)^0.6 | t_pick_best_boosted_foo:\"snow leopard\")" +
+ " ((t_pick_best_boosted_foo:jaguar)^0.95 | (t_pick_best_boosted_foo:\"big cat\")^0.85 | (t_pick_best_boosted_foo:\"black panther\")^0.65)", q.toString());
+
+ q = QParser.getParser("snow leopard panthera onca", req(params("df", "t_as_distinct_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:\"panthera uncia\")^0.9 (t_as_distinct_boosted_foo:\"big cat\")^0.8 (t_as_distinct_boosted_foo:white_leopard)^0.6 t_as_distinct_boosted_foo:\"snow leopard\")" +
+ " ((t_as_distinct_boosted_foo:jaguar)^0.95 (t_as_distinct_boosted_foo:\"big cat\")^0.85 (t_as_distinct_boosted_foo:\"black panther\")^0.65)", q.toString());
+
+ q = QParser.getParser("snow leopard panthera onca", req(params("df", "t_as_same_term_boosted_foo","sow", "false"))).getQuery();
+ assertEquals("((t_as_same_term_boosted_foo:\"panthera uncia\")^0.9 (t_as_same_term_boosted_foo:\"big cat\")^0.8 (t_as_same_term_boosted_foo:white_leopard)^0.6 t_as_same_term_boosted_foo:\"snow leopard\")" +
+ " ((t_as_same_term_boosted_foo:jaguar)^0.95 (t_as_same_term_boosted_foo:\"big cat\")^0.85 (t_as_same_term_boosted_foo:\"black panther\")^0.65)", q.toString());
+
+ }
+
+ public void testSynonymsBoost_edismaxBoost_shouldParseBoostedPhraseQuery() throws Exception {
+ Query q = QParser.getParser("snow leopard lion","edismax",true, req(params("sow", "false","qf", "t_pick_best_boosted_foo^10"))).getQuery();
+ assertEquals("+(" +
+ "((((t_pick_best_boosted_foo:\"panthera uncia\")^0.9 | (t_pick_best_boosted_foo:\"big cat\")^0.8 | (t_pick_best_boosted_foo:white_leopard)^0.6 | t_pick_best_boosted_foo:\"snow leopard\"))^10.0)" +
+ " ((((t_pick_best_boosted_foo:\"panthera leo\")^0.9 | (t_pick_best_boosted_foo:\"simba leo\")^0.8 | (t_pick_best_boosted_foo:kimba)^0.75))^10.0)" +
+ ")", q.toString());
+
+ q = QParser.getParser("snow leopard lion","edismax",true, req(params("sow", "false","qf", "t_as_distinct_boosted_foo^10"))).getQuery();
+ assertEquals("+(" +
+ "(((t_as_distinct_boosted_foo:\"panthera uncia\")^0.9 (t_as_distinct_boosted_foo:\"big cat\")^0.8 (t_as_distinct_boosted_foo:white_leopard)^0.6 t_as_distinct_boosted_foo:\"snow leopard\")^10.0)" +
+ " (((t_as_distinct_boosted_foo:\"panthera leo\")^0.9 (t_as_distinct_boosted_foo:\"simba leo\")^0.8 (t_as_distinct_boosted_foo:kimba)^0.75)^10.0))", q.toString());
+
+ q = QParser.getParser("snow leopard lion","edismax",true, req(params("sow", "false","qf", "t_as_same_term_boosted_foo^10"))).getQuery();
+ assertEquals("+(" +
+ "(((t_as_same_term_boosted_foo:\"panthera uncia\")^0.9 (t_as_same_term_boosted_foo:\"big cat\")^0.8 (t_as_same_term_boosted_foo:white_leopard)^0.6 t_as_same_term_boosted_foo:\"snow leopard\")^10.0)" +
+ " (((t_as_same_term_boosted_foo:\"panthera leo\")^0.9 (t_as_same_term_boosted_foo:\"simba leo\")^0.8 (t_as_same_term_boosted_foo:kimba)^0.75)^10.0))", q.toString());
+
+ }
+
+ public void testSynonymsBoost_phraseQueryMultiTermSynonymsBoost_shouldParseBoostedSpanQuery() throws Exception {
+ Query q = QParser.getParser("\"snow leopard lion\"", req(params("df", "t_pick_best_boosted_foo", "sow", "false"))).getQuery();
+ assertEquals("spanNear([" +
+ "spanOr([" +
+ "(spanNear([t_pick_best_boosted_foo:panthera, t_pick_best_boosted_foo:uncia], 0, true))^0.9," +
+ " (spanNear([t_pick_best_boosted_foo:big, t_pick_best_boosted_foo:cat], 0, true))^0.8," +
+ " (t_pick_best_boosted_foo:white_leopard)^0.6," +
+ " spanNear([t_pick_best_boosted_foo:snow, t_pick_best_boosted_foo:leopard], 0, true)])," +
+ " spanOr([" +
+ "(spanNear([t_pick_best_boosted_foo:panthera, t_pick_best_boosted_foo:leo], 0, true))^0.9," +
+ " (spanNear([t_pick_best_boosted_foo:simba, t_pick_best_boosted_foo:leo], 0, true))^0.8," +
+ " (t_pick_best_boosted_foo:kimba)^0.75])], 0, true)", q.toString());
+ }
+
+ public void testSynonymsBoost_phraseQueryMultiTermSynonymsMultipleBoost_shouldParseMultiplicativeBoostedSpanQuery() throws Exception {
+ Query q = QParser.getParser("\"panthera blytheae lion\"", req(params("df", "t_pick_best_boosted_foo", "sow", "false"))).getQuery();
+ assertEquals("spanNear([" +
+ "spanOr([" +
+ "(spanNear([t_pick_best_boosted_foo:oldest, t_pick_best_boosted_foo:ancient, t_pick_best_boosted_foo:panthera], 0, true))^0.45," +
+ " spanNear([t_pick_best_boosted_foo:panthera, t_pick_best_boosted_foo:blytheae], 0, true)])," +
+ " spanOr([" +
+ "(spanNear([t_pick_best_boosted_foo:panthera, t_pick_best_boosted_foo:leo], 0, true))^0.9," +
+ " (spanNear([t_pick_best_boosted_foo:simba, t_pick_best_boosted_foo:leo], 0, true))^0.8," +
+ " (t_pick_best_boosted_foo:kimba)^0.75])], 0, true)", q.toString());
+ }
+
+ public void testSynonymsBoost_BoostMissing_shouldAssignDefaultBoost() throws Exception {
+ //leopard, big cat|0.8, bagheera|0.9, panthera pardus|0.85
+ Query q = QParser.getParser("leopard", req(params("df", "t_pick_best_boosted_foo"))).getQuery();
+ assertEquals("((t_pick_best_boosted_foo:\"big cat\")^0.8 | (t_pick_best_boosted_foo:bagheera)^0.9 | (t_pick_best_boosted_foo:\"panthera pardus\")^0.85 | t_pick_best_boosted_foo:leopard)", q.toString());
+ q = QParser.getParser("leopard", req(params("df", "t_as_distinct_boosted_foo"))).getQuery();
+ assertEquals("((t_as_distinct_boosted_foo:\"big cat\")^0.8 (t_as_distinct_boosted_foo:bagheera)^0.9 (t_as_distinct_boosted_foo:\"panthera pardus\")^0.85 t_as_distinct_boosted_foo:leopard)", q.toString());
}
@Test
diff --git a/solr/solr-ref-guide/src/filter-descriptions.adoc b/solr/solr-ref-guide/src/filter-descriptions.adoc
index daa1f85dfe16..f4f6cb7a3008 100644
--- a/solr/solr-ref-guide/src/filter-descriptions.adoc
+++ b/solr/solr-ref-guide/src/filter-descriptions.adoc
@@ -398,6 +398,72 @@ Discard original token (`inject="false"`).
Note that "Kuczewski" has two encodings, which are added at the same position.
+== Delimited Boost Filter
+
+This filter adds a numeric floating point boost value to tokens, splitting on a delimiter character.
+
+*Factory class:* `solr.DelimitedBoostTokenFilterFactory`
+
+*Arguments:*
+
+`delimiter`:: The character used to separate the token and the boost. Defaults to '|'.
+
+*Example:*
+
+[.dynamic-tabs]
+--
+[example.tab-pane#byname-filter-delimitedBoost]
+====
+[.tab-label]*With name*
+[source,xml]
+----
+
+
+
+
+----
+====
+[example.tab-pane#byclass-filter-delimitedBoost]
+====
+[.tab-label]*With class name (legacy)*
+[source,xml]
+----
+
+
+
+
+----
+====
+--
+
+*In:* "leopard|0.5 panthera uncia|0.9"
+
+*Tokenizer to Filter:* "leopard|0.5"(1), "panthera"(2), "uncia|0.9"(3)
+
+*Out:* "leopard"(1)[0.5], "panthera"(2), "uncia"(3)[0.9]
+
+The numeric floating point in square brackets is a float token boost attribute.
+
+*Example:*
+
+Using a different delimiter (`delimiter="/"`).
+
+[source,xml]
+----
+
+
+
+
+----
+
+*In:* "leopard/0.5 panthera uncia/0.9"
+
+*Tokenizer to Filter:* "leopard/0.5"(1), "panthera"(2), "uncia/0.9"(3)
+
+*Out:* "leopard"(1)[0.5], "panthera"(2), "uncia"(3)[0.9]
+
+*N.B.* make sure the delimiter is compatible with the tokenizer you use
+
== Edge N-Gram Filter
This filter generates edge n-gram tokens of sizes within the given range.
@@ -2292,6 +2358,39 @@ small => tiny,teeny,weeny
*Out:* "the"(1), "large"(2), "large"(3), "couch"(4), "sofa"(4), "divan"(4)
+*Weighted Synonyms:*
+
+Combining the DelimitedBoostFilter with the Synonym Graph Filter you can achieve Weighted synonyms at query time.
+For more information feel free to refer to:
+https://sease.io/2020/02/introducing-weighted-synonyms-in-apache-lucene.html
+For the following examples, assume a synonyms file named `boostedSynonyms.txt`:
+
+[source,text]
+----
+leopard, big cat|0.8, bagheera|0.9, panthera pardus|0.85
+lion => panthera leo|0.9, simba|0.8, kimba|0.75
+----
+
+*Example:*
+
+====
+[.tab-label]*With name*
+[source,xml]
+----
+
+
+
+
+
+----
+====
+
+*In:* "lion"
+
+*Tokenizer to Filter:* "lion"(1)
+
+*Out:* "panthera"(1), "leo"(2)[0.9], "simba"(1)[0.8], "kimba"(1)[0.75]
+
== Token Offset Payload Filter
This filter adds the numeric character offsets of the token as a payload value for that token.