diff --git a/docs/changelog/89516.yaml b/docs/changelog/89516.yaml new file mode 100644 index 0000000000000..0fd37b69361cd --- /dev/null +++ b/docs/changelog/89516.yaml @@ -0,0 +1,5 @@ +pr: 89516 +summary: "Synthetic _source: support `match_only_text`" +area: "TSDB" +type: enhancement +issues: [] diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index e1ea8690dc572..26929a62d9766 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -12,6 +12,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -34,11 +35,15 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; +import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; +import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.StringFieldType; +import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader; import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.index.mapper.TextParams; @@ -48,6 +53,7 @@ import org.elasticsearch.script.field.TextDocValuesField; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.lookup.SourceLookup; +import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.io.UncheckedIOException; @@ -56,6 +62,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.function.Function; /** @@ -171,6 +178,17 @@ private Function, IOException "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." ); } + if (searchExecutionContext.isSourceSynthetic()) { + String name = storedFieldNameForSyntheticSource(); + StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name)); + return context -> { + LeafStoredFieldLoader leafLoader = loader.getLoader(context, null); + return docId -> { + leafLoader.advanceTo(docId); + return leafLoader.storedFields().get(name); + }; + }; + } SourceLookup sourceLookup = searchExecutionContext.lookup().source(); ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null); return context -> { @@ -292,6 +310,9 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations"); } + private String storedFieldNameForSyntheticSource() { + return name() + "._original"; + } } private final Version indexCreatedVersion; @@ -339,6 +360,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio Field field = new Field(fieldType().name(), value, fieldType); context.doc().add(field); context.addToFieldNames(fieldType().name()); + + if (context.isSyntheticSource()) { + context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value)); + } } @Override @@ -351,4 +376,18 @@ public MatchOnlyTextFieldType fieldType() { return (MatchOnlyTextFieldType) super.fieldType(); } + @Override + public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { + if (copyTo.copyToFields().isEmpty() != true) { + throw new IllegalArgumentException( + "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" + ); + } + return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName(), null) { + @Override + protected void write(XContentBuilder b, Object value) throws IOException { + b.value((String) value); + } + }; + } } diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index 6a7270fe8f5bc..1179c03a35ab3 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -9,20 +9,28 @@ package org.elasticsearch.index.mapper.extras; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.CannedTokenStream; import org.apache.lucene.tests.analysis.Token; +import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xcontent.XContentBuilder; @@ -34,6 +42,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -51,10 +60,37 @@ protected Object getSampleValueForDocument() { return "value"; } - public final void testExists() throws IOException { - MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); })); - assertExistsQuery(mapperService); - assertParseMinimalWarnings(); + public void testExistsStandardSource() throws IOException { + assertExistsQuery(createMapperService(testMapping(false))); + } + + public void testExistsSyntheticSource() throws IOException { + assertExistsQuery(createMapperService(testMapping(true))); + } + + public void testPhraseQueryStandardSource() throws IOException { + assertPhraseQuery(createMapperService(testMapping(false))); + } + + public void testPhraseQuerySyntheticSource() throws IOException { + assertPhraseQuery(createMapperService(testMapping(true))); + } + + private void assertPhraseQuery(MapperService mapperService) throws IOException { + try (Directory directory = newDirectory()) { + RandomIndexWriter iw = new RandomIndexWriter(random(), directory); + LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox"))).rootDoc(); + iw.addDocument(doc); + iw.close(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader)); + MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox"); + TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1); + assertThat(docs.totalHits.value, equalTo(1L)); + assertThat(docs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(docs.scoreDocs[0].doc, equalTo(0)); + } + } } @Override @@ -65,6 +101,13 @@ protected void registerParameters(ParameterChecker checker) throws IOException { ); } + private XContentBuilder testMapping(boolean syntheticSource) throws IOException { + if (syntheticSource) { + return syntheticSourceMapping(b -> b.startObject("field").field("type", "match_only_text").endObject()); + } + return fieldMapping(b -> b.field("type", "match_only_text")); + } + @Override protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "match_only_text"); @@ -166,7 +209,36 @@ protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException @Override protected SyntheticSourceSupport syntheticSourceSupport() { - throw new AssumptionViolatedException("not supported"); + return new MatchOnlyTextSyntheticSourceSupport(); + } + + static class MatchOnlyTextSyntheticSourceSupport implements SyntheticSourceSupport { + @Override + public SyntheticSourceExample example(int maxValues) { + if (randomBoolean()) { + Tuple v = generateValue(); + return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping); + } + List> values = randomList(1, maxValues, this::generateValue); + List in = values.stream().map(Tuple::v1).toList(); + List outList = values.stream().map(Tuple::v2).toList(); + Object out = outList.size() == 1 ? outList.get(0) : outList; + return new SyntheticSourceExample(in, out, this::mapping); + } + + private Tuple generateValue() { + String v = randomList(1, 10, () -> randomAlphaOfLength(5)).stream().collect(Collectors.joining(" ")); + return Tuple.tuple(v, v); + } + + private void mapping(XContentBuilder b) throws IOException { + b.field("type", "match_only_text"); + } + + @Override + public List invalidExample() throws IOException { + return List.of(); + } } @Override diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java index b537c35001f20..8b04b2f3013ea 100644 --- a/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java @@ -84,6 +84,9 @@ public Set getFieldNames() { public final void postProcess(Function fieldTypeLookup) { for (Map.Entry> entry : fields().entrySet()) { MappedFieldType fieldType = fieldTypeLookup.apply(entry.getKey()); + if (fieldType == null) { + continue; // TODO this is lame + } List fieldValues = entry.getValue(); for (int i = 0; i < fieldValues.size(); i++) { fieldValues.set(i, fieldType.valueForDisplay(fieldValues.get(i))); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index ee15f5e273fa0..538e5b4a0694a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -404,11 +404,22 @@ public boolean hasMappings() { return this != EMPTY; } + /** + * Will there be {@code _source}. + */ public boolean isSourceEnabled() { SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class); return sfm != null && sfm.enabled(); } + /** + * Does the source need to be rebuilt on the fly? + */ + public boolean isSourceSynthetic() { + SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class); + return sfm != null && sfm.isSynthetic(); + } + /** * Build something to load source {@code _source}. */ diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java index ae42e2ba366eb..c07ae53d910f5 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java @@ -392,10 +392,20 @@ public Set sourcePath(String fullName) { return mappingLookup.sourcePaths(fullName); } + /** + * Will there be {@code _source}. + */ public boolean isSourceEnabled() { return mappingLookup.isSourceEnabled(); } + /** + * Does the source need to be rebuilt on the fly? + */ + public boolean isSourceSynthetic() { + return mappingLookup.isSourceSynthetic(); + } + /** * Build something to load source {@code _source}. */