-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DerivedFields] DerivedFieldScript and query execution logic (#12746)
First in a series of commits to support derived fields, a form of schema-on-read. This commit adds: 1. DerivedFieldScript factory: This script factory will be used to execute scripts defined against derived fields of any type. 2. DerivedFieldValueFetcher: The value fetcher contains logic to execute script and fetch the value in form of List<Object>. It expects DerivedFieldScript.LeafFactory as an input and sets the contract with consumer to call setNextReader() whenever a segment is switched. 3. DerivedFieldQuery: This query will be used by any of the derived fields. It expects an input query and DerivedFieldValueFetcher. It uses 2-phase iterator approach with approximation iterator set to match all docs. On a match, it creates a lucene MemoryIndex for a given doc, fetches the value of the derived field from _source using DerivedFieldValueFetcher and executes the input query against. --------- Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com> (cherry picked from commit 70711cf) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
48881de
commit b01cf56
Showing
7 changed files
with
417 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
45 changes: 45 additions & 0 deletions
45
server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.mapper; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.opensearch.script.DerivedFieldScript; | ||
import org.opensearch.search.lookup.SourceLookup; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
|
||
/** | ||
* The value fetcher contains logic to execute script and fetch the value in form of list of object. | ||
* It expects DerivedFieldScript.LeafFactory as an input and sets the contract with consumer to call | ||
* {@link #setNextReader(LeafReaderContext)} whenever a segment is switched. | ||
*/ | ||
public final class DerivedFieldValueFetcher implements ValueFetcher { | ||
private DerivedFieldScript derivedFieldScript; | ||
private final DerivedFieldScript.LeafFactory derivedFieldScriptFactory; | ||
|
||
public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScriptFactory) { | ||
this.derivedFieldScriptFactory = derivedFieldScriptFactory; | ||
} | ||
|
||
@Override | ||
public List<Object> fetchValues(SourceLookup lookup) { | ||
derivedFieldScript.setDocument(lookup.docId()); | ||
// TODO: remove List.of() when derivedFieldScript.execute() returns list of objects. | ||
return List.of(derivedFieldScript.execute()); | ||
} | ||
|
||
public void setNextReader(LeafReaderContext context) { | ||
try { | ||
derivedFieldScript = derivedFieldScriptFactory.newInstance(context); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} |
146 changes: 146 additions & 0 deletions
146
server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.query; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.index.IndexableField; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.memory.MemoryIndex; | ||
import org.apache.lucene.search.ConstantScoreScorer; | ||
import org.apache.lucene.search.ConstantScoreWeight; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.QueryVisitor; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.apache.lucene.search.Scorer; | ||
import org.apache.lucene.search.TwoPhaseIterator; | ||
import org.apache.lucene.search.Weight; | ||
import org.opensearch.index.mapper.DerivedFieldValueFetcher; | ||
import org.opensearch.search.lookup.LeafSearchLookup; | ||
import org.opensearch.search.lookup.SearchLookup; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import java.util.function.Function; | ||
|
||
/** | ||
* DerivedFieldQuery used for querying derived fields. It contains the logic to execute an input lucene query against | ||
* DerivedField. It also accepts DerivedFieldValueFetcher and SearchLookup as an input. | ||
*/ | ||
public final class DerivedFieldQuery extends Query { | ||
private final Query query; | ||
private final DerivedFieldValueFetcher valueFetcher; | ||
private final SearchLookup searchLookup; | ||
private final Function<Object, IndexableField> indexableFieldGenerator; | ||
private final Analyzer indexAnalyzer; | ||
|
||
/** | ||
* @param query lucene query to be executed against the derived field | ||
* @param valueFetcher DerivedFieldValueFetcher ValueFetcher to fetch the value of a derived field from _source | ||
* using LeafSearchLookup | ||
* @param searchLookup SearchLookup to get the LeafSearchLookup look used by valueFetcher to fetch the _source | ||
* @param indexableFieldGenerator used to generate lucene IndexableField from a given object fetched by valueFetcher | ||
* to be used in lucene memory index. | ||
*/ | ||
public DerivedFieldQuery( | ||
Query query, | ||
DerivedFieldValueFetcher valueFetcher, | ||
SearchLookup searchLookup, | ||
Function<Object, IndexableField> indexableFieldGenerator, | ||
Analyzer indexAnalyzer | ||
) { | ||
this.query = query; | ||
this.valueFetcher = valueFetcher; | ||
this.searchLookup = searchLookup; | ||
this.indexableFieldGenerator = indexableFieldGenerator; | ||
this.indexAnalyzer = indexAnalyzer; | ||
} | ||
|
||
@Override | ||
public void visit(QueryVisitor visitor) { | ||
query.visit(visitor); | ||
} | ||
|
||
@Override | ||
public Query rewrite(IndexSearcher indexSearcher) throws IOException { | ||
Query rewritten = indexSearcher.rewrite(query); | ||
if (rewritten == query) { | ||
return this; | ||
} | ||
return new DerivedFieldQuery(rewritten, valueFetcher, searchLookup, indexableFieldGenerator, indexAnalyzer); | ||
} | ||
|
||
@Override | ||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { | ||
|
||
return new ConstantScoreWeight(this, boost) { | ||
@Override | ||
public Scorer scorer(LeafReaderContext context) { | ||
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc()); | ||
valueFetcher.setNextReader(context); | ||
LeafSearchLookup leafSearchLookup = searchLookup.getLeafSearchLookup(context); | ||
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { | ||
@Override | ||
public boolean matches() { | ||
leafSearchLookup.source().setSegmentAndDocument(context, approximation.docID()); | ||
List<Object> values = valueFetcher.fetchValues(leafSearchLookup.source()); | ||
// TODO: in case of errors from script, should it be ignored and treated as missing field | ||
// by using a configurable setting? | ||
MemoryIndex memoryIndex = new MemoryIndex(); | ||
for (Object value : values) { | ||
memoryIndex.addField(indexableFieldGenerator.apply(value), indexAnalyzer); | ||
} | ||
float score = memoryIndex.search(query); | ||
return score > 0.0f; | ||
} | ||
|
||
@Override | ||
public float matchCost() { | ||
// TODO: how can we compute this? | ||
return 1000f; | ||
} | ||
}; | ||
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); | ||
} | ||
|
||
@Override | ||
public boolean isCacheable(LeafReaderContext ctx) { | ||
return false; | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) { | ||
return true; | ||
} | ||
if (sameClassAs(o) == false) { | ||
return false; | ||
} | ||
DerivedFieldQuery other = (DerivedFieldQuery) o; | ||
return Objects.equals(this.query, other.query) | ||
&& Objects.equals(this.valueFetcher, other.valueFetcher) | ||
&& Objects.equals(this.searchLookup, other.searchLookup) | ||
&& Objects.equals(this.indexableFieldGenerator, other.indexableFieldGenerator) | ||
&& Objects.equals(this.indexAnalyzer, other.indexAnalyzer); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(classHash(), query, valueFetcher, searchLookup, indexableFieldGenerator, indexableFieldGenerator); | ||
} | ||
|
||
@Override | ||
public String toString(String f) { | ||
return "DerivedFieldQuery (Query: [ " + query.toString(f) + "])"; | ||
} | ||
} |
104 changes: 104 additions & 0 deletions
104
server/src/main/java/org/opensearch/script/DerivedFieldScript.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.script; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.opensearch.common.logging.DeprecationLogger; | ||
import org.opensearch.index.fielddata.ScriptDocValues; | ||
import org.opensearch.search.lookup.LeafSearchLookup; | ||
import org.opensearch.search.lookup.SearchLookup; | ||
import org.opensearch.search.lookup.SourceLookup; | ||
|
||
import java.io.IOException; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.function.Function; | ||
|
||
/** | ||
* Definition of Script for DerivedField. | ||
* It will be used to execute scripts defined against derived fields of any type | ||
* | ||
* @opensearch.internal | ||
*/ | ||
public abstract class DerivedFieldScript { | ||
|
||
public static final String[] PARAMETERS = {}; | ||
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("derived_field", Factory.class); | ||
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class); | ||
|
||
private static final Map<String, Function<Object, Object>> PARAMS_FUNCTIONS = Map.of( | ||
"doc", | ||
value -> value, | ||
"_source", | ||
value -> ((SourceLookup) value).loadSourceIfNeeded() | ||
); | ||
|
||
/** | ||
* The generic runtime parameters for the script. | ||
*/ | ||
private final Map<String, Object> params; | ||
|
||
/** | ||
* A leaf lookup for the bound segment this script will operate on. | ||
*/ | ||
private final LeafSearchLookup leafLookup; | ||
|
||
public DerivedFieldScript(Map<String, Object> params, SearchLookup lookup, LeafReaderContext leafContext) { | ||
Map<String, Object> parameters = new HashMap<>(params); | ||
this.leafLookup = lookup.getLeafSearchLookup(leafContext); | ||
parameters.putAll(leafLookup.asMap()); | ||
this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS); | ||
} | ||
|
||
protected DerivedFieldScript() { | ||
params = null; | ||
leafLookup = null; | ||
} | ||
|
||
/** | ||
* Return the parameters for this script. | ||
*/ | ||
public Map<String, Object> getParams() { | ||
return params; | ||
} | ||
|
||
/** | ||
* The doc lookup for the Lucene segment this script was created for. | ||
*/ | ||
public Map<String, ScriptDocValues<?>> getDoc() { | ||
return leafLookup.doc(); | ||
} | ||
|
||
/** | ||
* Set the current document to run the script on next. | ||
*/ | ||
public void setDocument(int docid) { | ||
leafLookup.setDocument(docid); | ||
} | ||
|
||
public abstract Object execute(); | ||
|
||
/** | ||
* A factory to construct {@link DerivedFieldScript} instances. | ||
* | ||
* @opensearch.internal | ||
*/ | ||
public interface LeafFactory { | ||
DerivedFieldScript newInstance(LeafReaderContext ctx) throws IOException; | ||
} | ||
|
||
/** | ||
* A factory to construct stateful {@link DerivedFieldScript} factories for a specific index. | ||
* | ||
* @opensearch.internal | ||
*/ | ||
public interface Factory extends ScriptFactory { | ||
LeafFactory newFactory(Map<String, Object> params, SearchLookup lookup); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.