elastic · nik9000 · Sep 1, 2022 · Aug 22, 2022 · Aug 22, 2022 · Aug 22, 2022
diff --git a/docs/changelog/89516.yaml b/docs/changelog/89516.yaml
@@ -0,0 +1,5 @@
+pr: 89516
+summary: "Synthetic _source: support `match_only_text`"
+area: "TSDB"
+type: enhancement
+issues: []
diff --git a/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -12,6 +12,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
@@ -34,11 +35,15 @@
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
+import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
+import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
 import org.elasticsearch.index.mapper.DocumentParserContext;
 import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.MapperBuilderContext;
+import org.elasticsearch.index.mapper.SourceLoader;
 import org.elasticsearch.index.mapper.SourceValueFetcher;
 import org.elasticsearch.index.mapper.StringFieldType;
+import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
 import org.elasticsearch.index.mapper.TextFieldMapper;
 import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
 import org.elasticsearch.index.mapper.TextParams;
@@ -48,6 +53,7 @@
 import org.elasticsearch.script.field.TextDocValuesField;
 import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 import org.elasticsearch.search.lookup.SourceLookup;
+import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
@@ -56,6 +62,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.function.Function;
 
 /**
@@ -171,6 +178,17 @@ private Function<LeafReaderContext, CheckedIntFunction<List<Object>, IOException
                     "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
                 );
             }
+            if (searchExecutionContext.isSourceSynthetic()) {
+                String name = storedFieldNameForSyntheticSource();
+                StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
+                return context -> {
+                    LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
+                    return docId -> {
+                        leafLoader.advanceTo(docId);
+                        return leafLoader.storedFields().get(name);
+                    };
+                };
+            }
             SourceLookup sourceLookup = searchExecutionContext.lookup().source();
             ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
             return context -> {
@@ -292,6 +310,9 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
             throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
         }
 
+        private String storedFieldNameForSyntheticSource() {
+            return name() + "._original";
+        }
     }
 
     private final Version indexCreatedVersion;
@@ -339,6 +360,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
         Field field = new Field(fieldType().name(), value, fieldType);
         context.doc().add(field);
         context.addToFieldNames(fieldType().name());
+
+        if (context.isSyntheticSource()) {
+            context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
+        }
     }
 
     @Override
@@ -351,4 +376,18 @@ public MatchOnlyTextFieldType fieldType() {
         return (MatchOnlyTextFieldType) super.fieldType();
     }
 
+    @Override
+    public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
+        if (copyTo.copyToFields().isEmpty() != true) {
+            throw new IllegalArgumentException(
+                "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
+            );
+        }
+        return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName(), null) {
+            @Override
+            protected void write(XContentBuilder b, Object value) throws IOException {
+                b.value((String) value);
+            }
+        };
+    }
 }
diff --git a/...as/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/...as/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java
@@ -9,20 +9,28 @@
 package org.elasticsearch.index.mapper.extras;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.analysis.CannedTokenStream;
 import org.apache.lucene.tests.analysis.Token;
+import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.elasticsearch.common.Strings;
+import org.elasticsearch.core.Tuple;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.LuceneDocument;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperParsingException;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.mapper.MapperTestCase;
 import org.elasticsearch.index.mapper.ParsedDocument;
+import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.xcontent.XContentBuilder;
@@ -34,6 +42,7 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.stream.Collectors;
 
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -51,10 +60,37 @@ protected Object getSampleValueForDocument() {
         return "value";
     }
 
-    public final void testExists() throws IOException {
-        MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); }));
-        assertExistsQuery(mapperService);
-        assertParseMinimalWarnings();
+    public void testExistsStandardSource() throws IOException {
+        assertExistsQuery(createMapperService(testMapping(false)));
+    }
+
+    public void testExistsSyntheticSource() throws IOException {
+        assertExistsQuery(createMapperService(testMapping(true)));
+    }
+
+    public void testPhraseQueryStandardSource() throws IOException {
+        assertPhraseQuery(createMapperService(testMapping(false)));
+    }
+
+    public void testPhraseQuerySyntheticSource() throws IOException {
+        assertPhraseQuery(createMapperService(testMapping(true)));
+    }
+
+    private void assertPhraseQuery(MapperService mapperService) throws IOException {
+        try (Directory directory = newDirectory()) {
+            RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
+            LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox"))).rootDoc();
+            iw.addDocument(doc);
+            iw.close();
+            try (DirectoryReader reader = DirectoryReader.open(directory)) {
+                SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader));
+                MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox");
+                TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1);
+                assertThat(docs.totalHits.value, equalTo(1L));
+                assertThat(docs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
+                assertThat(docs.scoreDocs[0].doc, equalTo(0));
+            }
+        }
     }
 
     @Override
@@ -65,6 +101,13 @@ protected void registerParameters(ParameterChecker checker) throws IOException {
         );
     }
 
+    private XContentBuilder testMapping(boolean syntheticSource) throws IOException {
+        if (syntheticSource) {
+            return syntheticSourceMapping(b -> b.startObject("field").field("type", "match_only_text").endObject());
+        }
+        return fieldMapping(b -> b.field("type", "match_only_text"));
+    }
+
     @Override
     protected void minimalMapping(XContentBuilder b) throws IOException {
         b.field("type", "match_only_text");
@@ -166,7 +209,36 @@ protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException
 
     @Override
     protected SyntheticSourceSupport syntheticSourceSupport() {
-        throw new AssumptionViolatedException("not supported");
+        return new MatchOnlyTextSyntheticSourceSupport();
+    }
+
+    static class MatchOnlyTextSyntheticSourceSupport implements SyntheticSourceSupport {
+        @Override
+        public SyntheticSourceExample example(int maxValues) {
+            if (randomBoolean()) {
+                Tuple<String, String> v = generateValue();
+                return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
+            }
+            List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue);
+            List<String> in = values.stream().map(Tuple::v1).toList();
+            List<String> outList = values.stream().map(Tuple::v2).toList();
+            Object out = outList.size() == 1 ? outList.get(0) : outList;
+            return new SyntheticSourceExample(in, out, this::mapping);
+        }
+
+        private Tuple<String, String> generateValue() {
+            String v = randomList(1, 10, () -> randomAlphaOfLength(5)).stream().collect(Collectors.joining(" "));
+            return Tuple.tuple(v, v);
+        }
+
+        private void mapping(XContentBuilder b) throws IOException {
+            b.field("type", "match_only_text");
+        }
+
+        @Override
+        public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
+            return List.of();
+        }
     }
 
     @Override

diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java
@@ -84,6 +84,9 @@ public Set<String> getFieldNames() {
     public final void postProcess(Function<String, MappedFieldType> fieldTypeLookup) {
         for (Map.Entry<String, List<Object>> entry : fields().entrySet()) {
             MappedFieldType fieldType = fieldTypeLookup.apply(entry.getKey());
+            if (fieldType == null) {
+                continue; // TODO this is lame
+            }
             List<Object> fieldValues = entry.getValue();
             for (int i = 0; i < fieldValues.size(); i++) {
                 fieldValues.set(i, fieldType.valueForDisplay(fieldValues.get(i)));

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java
@@ -404,11 +404,22 @@ public boolean hasMappings() {
         return this != EMPTY;
     }
 
+    /**
+     * Will there be {@code _source}.
+     */
     public boolean isSourceEnabled() {
         SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class);
         return sfm != null && sfm.enabled();
     }
 
+    /**
+     * Does the source need to be rebuilt on the fly?
+     */
+    public boolean isSourceSynthetic() {
+        SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class);
+        return sfm != null && sfm.isSynthetic();
+    }
+
     /**
      * Build something to load source {@code _source}.
      */

diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java
@@ -392,10 +392,20 @@ public Set<String> sourcePath(String fullName) {
         return mappingLookup.sourcePaths(fullName);
     }
 
+    /**
+     * Will there be {@code _source}.
+     */
     public boolean isSourceEnabled() {
         return mappingLookup.isSourceEnabled();
     }
 
+    /**
+     * Does the source need to be rebuilt on the fly?
+     */
+    public boolean isSourceSynthetic() {
+        return mappingLookup.isSourceSynthetic();
+    }
+
     /**
      * Build something to load source {@code _source}.
      */