-
Notifications
You must be signed in to change notification settings - Fork 25.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Synthetic _source: support match_only_text #89516
Merged
nik9000
merged 14 commits into
elastic:main
from
nik9000:synthetic_source_match_only_text
Sep 1, 2022
Merged
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
a95991c
Synthetic _source: support match_only_text
nik9000 2899228
Words
nik9000 4d2f0b6
Update docs/changelog/89516.yaml
nik9000 2e268c3
Fixup
nik9000 0b242b9
Fixup
nik9000 0d9fe2d
More tests for enrich processor
nik9000 7c3bb3f
Merge branch 'main' into synthetic_source_match_only_text
nik9000 1b08a25
Clean
nik9000 6cfda5b
Merge branch 'main' into synthetic_source_match_only_text
nik9000 0495296
Merge branch 'main' into synthetic_source_match_only_text
nik9000 8998678
Update
nik9000 373a229
Merge branch 'main' into synthetic_source_match_only_text
nik9000 6c27a51
Compile after merge
nik9000 67319d8
Fixup
nik9000 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 89516 | ||
summary: "Synthetic _source: support `match_only_text`" | ||
area: "TSDB" | ||
type: enhancement | ||
issues: [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,20 +9,28 @@ | |
package org.elasticsearch.index.mapper.extras; | ||
|
||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.index.DirectoryReader; | ||
import org.apache.lucene.index.DocValuesType; | ||
import org.apache.lucene.index.IndexOptions; | ||
import org.apache.lucene.index.IndexableField; | ||
import org.apache.lucene.index.IndexableFieldType; | ||
import org.apache.lucene.search.TopDocs; | ||
import org.apache.lucene.search.TotalHits; | ||
import org.apache.lucene.store.Directory; | ||
import org.apache.lucene.tests.analysis.CannedTokenStream; | ||
import org.apache.lucene.tests.analysis.Token; | ||
import org.apache.lucene.tests.index.RandomIndexWriter; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.core.Tuple; | ||
import org.elasticsearch.index.mapper.DocumentMapper; | ||
import org.elasticsearch.index.mapper.KeywordFieldMapper; | ||
import org.elasticsearch.index.mapper.LuceneDocument; | ||
import org.elasticsearch.index.mapper.MappedFieldType; | ||
import org.elasticsearch.index.mapper.MapperParsingException; | ||
import org.elasticsearch.index.mapper.MapperService; | ||
import org.elasticsearch.index.mapper.MapperTestCase; | ||
import org.elasticsearch.index.mapper.ParsedDocument; | ||
import org.elasticsearch.index.query.MatchPhraseQueryBuilder; | ||
import org.elasticsearch.index.query.SearchExecutionContext; | ||
import org.elasticsearch.plugins.Plugin; | ||
import org.elasticsearch.xcontent.XContentBuilder; | ||
|
@@ -34,6 +42,7 @@ | |
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.hamcrest.Matchers.containsString; | ||
import static org.hamcrest.Matchers.equalTo; | ||
|
@@ -51,10 +60,37 @@ protected Object getSampleValueForDocument() { | |
return "value"; | ||
} | ||
|
||
public final void testExists() throws IOException { | ||
MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); })); | ||
assertExistsQuery(mapperService); | ||
assertParseMinimalWarnings(); | ||
public void testExistsStandardSource() throws IOException { | ||
assertExistsQuery(createMapperService(testMapping(false))); | ||
} | ||
|
||
public void testExistsSyntheticSource() throws IOException { | ||
assertExistsQuery(createMapperService(testMapping(true))); | ||
} | ||
|
||
public void testPhraseQueryStandardSource() throws IOException { | ||
assertPhraseQuery(createMapperService(testMapping(false))); | ||
} | ||
|
||
public void testPhraseQuerySyntheticSource() throws IOException { | ||
assertPhraseQuery(createMapperService(testMapping(true))); | ||
} | ||
|
||
private void assertPhraseQuery(MapperService mapperService) throws IOException { | ||
try (Directory directory = newDirectory()) { | ||
RandomIndexWriter iw = new RandomIndexWriter(random(), directory); | ||
LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox"))).rootDoc(); | ||
iw.addDocument(doc); | ||
iw.close(); | ||
try (DirectoryReader reader = DirectoryReader.open(directory)) { | ||
SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader)); | ||
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox"); | ||
TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1); | ||
assertThat(docs.totalHits.value, equalTo(1L)); | ||
assertThat(docs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO)); | ||
assertThat(docs.scoreDocs[0].doc, equalTo(0)); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
|
@@ -65,6 +101,13 @@ protected void registerParameters(ParameterChecker checker) throws IOException { | |
); | ||
} | ||
|
||
private XContentBuilder testMapping(boolean syntheticSource) throws IOException { | ||
if (syntheticSource) { | ||
return syntheticSourceMapping(b -> b.startObject("field").field("type", "match_only_text").endObject()); | ||
} | ||
return fieldMapping(b -> b.field("type", "match_only_text")); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I'd be happier with coverage here if we explicitly run both tests for the synthetic and non-synthetic case, this feels at the moment like we're only testing 50% of the functionality on each run and I think that will come back to bite us. |
||
@Override | ||
protected void minimalMapping(XContentBuilder b) throws IOException { | ||
b.field("type", "match_only_text"); | ||
|
@@ -166,7 +209,36 @@ protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException | |
|
||
@Override | ||
protected SyntheticSourceSupport syntheticSourceSupport() { | ||
throw new AssumptionViolatedException("not supported"); | ||
return new MatchOnlyTextSyntheticSourceSupport(); | ||
} | ||
|
||
static class MatchOnlyTextSyntheticSourceSupport implements SyntheticSourceSupport { | ||
@Override | ||
public SyntheticSourceExample example(int maxValues) { | ||
if (randomBoolean()) { | ||
Tuple<String, String> v = generateValue(); | ||
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping); | ||
} | ||
List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue); | ||
List<String> in = values.stream().map(Tuple::v1).toList(); | ||
List<String> outList = values.stream().map(Tuple::v2).toList(); | ||
Object out = outList.size() == 1 ? outList.get(0) : outList; | ||
return new SyntheticSourceExample(in, out, this::mapping); | ||
} | ||
|
||
private Tuple<String, String> generateValue() { | ||
String v = randomList(1, 10, () -> randomAlphaOfLength(5)).stream().collect(Collectors.joining(" ")); | ||
return Tuple.tuple(v, v); | ||
} | ||
|
||
private void mapping(XContentBuilder b) throws IOException { | ||
b.field("type", "match_only_text"); | ||
} | ||
|
||
@Override | ||
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException { | ||
return List.of(); | ||
} | ||
} | ||
|
||
@Override | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a stored field lookup thing in
searchExecutionContext.lookup()
but it can't be convinced to load the hidden stored field. If we feel strongly about it I can try and integrate into it, but I'm not super sure how at the moment.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that's strictly for scripts and is integrated poorly with other stored field lookup stuff, so I don't think it's worth trying to re-use it for the moment. I do think that the document lookup API I'm playing with at the moment will improve this though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It has a lovely caching mechanism that I think could be quite nice. If multiple queries need to recheck the source it'll load once while this won't.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, somewhat annoyingly, I don't think we will be able to re-use a stored field loader from the SearchLookup here because the underlying API expects a LeafReaderContext, not a LeafSearchLookup. But you should be able to use a LeafStoredFieldLoader rather than a FieldVisitor here which will at least be more readable.