Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthetic _source: support match_only_text #89516

Merged
merged 14 commits into from
Sep 1, 2022
5 changes: 5 additions & 0 deletions docs/changelog/89516.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 89516
summary: "Synthetic _source: support `match_only_text`"
area: "TSDB"
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
Expand All @@ -34,11 +35,15 @@
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.mapper.TextParams;
Expand All @@ -48,6 +53,7 @@
import org.elasticsearch.script.field.TextDocValuesField;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.SourceLookup;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.io.UncheckedIOException;
Expand All @@ -56,6 +62,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;

/**
Expand Down Expand Up @@ -171,6 +178,17 @@ private Function<LeafReaderContext, CheckedIntFunction<List<Object>, IOException
"Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
);
}
if (searchExecutionContext.isSourceSynthetic()) {
String name = storedFieldNameForSyntheticSource();
StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
return context -> {
LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
return docId -> {
leafLoader.advanceTo(docId);
return leafLoader.storedFields().get(name);
};
};
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a stored field lookup thing in searchExecutionContext.lookup() but it can't be convinced to load the hidden stored field. If we feel strongly about it I can try and integrate into it, but I'm not super sure how at the moment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's strictly for scripts and is integrated poorly with other stored field lookup stuff, so I don't think it's worth trying to re-use it for the moment. I do think that the document lookup API I'm playing with at the moment will improve this though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It has a lovely caching mechanism that I think could be quite nice. If multiple queries need to recheck the source it'll load once while this won't.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, somewhat annoyingly, I don't think we will be able to re-use a stored field loader from the SearchLookup here because the underlying API expects a LeafReaderContext, not a LeafSearchLookup. But you should be able to use a LeafStoredFieldLoader rather than a FieldVisitor here which will at least be more readable.

}
SourceLookup sourceLookup = searchExecutionContext.lookup().source();
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
return context -> {
Expand Down Expand Up @@ -292,6 +310,9 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
}

private String storedFieldNameForSyntheticSource() {
return name() + "._original";
}
}

private final Version indexCreatedVersion;
Expand Down Expand Up @@ -339,6 +360,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
Field field = new Field(fieldType().name(), value, fieldType);
context.doc().add(field);
context.addToFieldNames(fieldType().name());

if (context.isSyntheticSource()) {
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
}
}

@Override
Expand All @@ -351,4 +376,18 @@ public MatchOnlyTextFieldType fieldType() {
return (MatchOnlyTextFieldType) super.fieldType();
}

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
if (copyTo.copyToFields().isEmpty() != true) {
throw new IllegalArgumentException(
"field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
);
}
return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName(), null) {
@Override
protected void write(XContentBuilder b, Object value) throws IOException {
b.value((String) value);
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,28 @@
package org.elasticsearch.index.mapper.extras;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MapperTestCase;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.xcontent.XContentBuilder;
Expand All @@ -34,6 +42,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -51,10 +60,37 @@ protected Object getSampleValueForDocument() {
return "value";
}

public final void testExists() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); }));
assertExistsQuery(mapperService);
assertParseMinimalWarnings();
public void testExistsStandardSource() throws IOException {
assertExistsQuery(createMapperService(testMapping(false)));
}

public void testExistsSyntheticSource() throws IOException {
assertExistsQuery(createMapperService(testMapping(true)));
}

public void testPhraseQueryStandardSource() throws IOException {
assertPhraseQuery(createMapperService(testMapping(false)));
}

public void testPhraseQuerySyntheticSource() throws IOException {
assertPhraseQuery(createMapperService(testMapping(true)));
}

private void assertPhraseQuery(MapperService mapperService) throws IOException {
try (Directory directory = newDirectory()) {
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox"))).rootDoc();
iw.addDocument(doc);
iw.close();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader));
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox");
TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1);
assertThat(docs.totalHits.value, equalTo(1L));
assertThat(docs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
}
}
}

@Override
Expand All @@ -65,6 +101,13 @@ protected void registerParameters(ParameterChecker checker) throws IOException {
);
}

private XContentBuilder testMapping(boolean syntheticSource) throws IOException {
if (syntheticSource) {
return syntheticSourceMapping(b -> b.startObject("field").field("type", "match_only_text").endObject());
}
return fieldMapping(b -> b.field("type", "match_only_text"));
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'd be happier with coverage here if we explicitly run both tests for the synthetic and non-synthetic case, this feels at the moment like we're only testing 50% of the functionality on each run and I think that will come back to bite us.

@Override
protected void minimalMapping(XContentBuilder b) throws IOException {
b.field("type", "match_only_text");
Expand Down Expand Up @@ -166,7 +209,36 @@ protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException

@Override
protected SyntheticSourceSupport syntheticSourceSupport() {
throw new AssumptionViolatedException("not supported");
return new MatchOnlyTextSyntheticSourceSupport();
}

static class MatchOnlyTextSyntheticSourceSupport implements SyntheticSourceSupport {
@Override
public SyntheticSourceExample example(int maxValues) {
if (randomBoolean()) {
Tuple<String, String> v = generateValue();
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
}
List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue);
List<String> in = values.stream().map(Tuple::v1).toList();
List<String> outList = values.stream().map(Tuple::v2).toList();
Object out = outList.size() == 1 ? outList.get(0) : outList;
return new SyntheticSourceExample(in, out, this::mapping);
}

private Tuple<String, String> generateValue() {
String v = randomList(1, 10, () -> randomAlphaOfLength(5)).stream().collect(Collectors.joining(" "));
return Tuple.tuple(v, v);
}

private void mapping(XContentBuilder b) throws IOException {
b.field("type", "match_only_text");
}

@Override
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
return List.of();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ public Set<String> getFieldNames() {
public final void postProcess(Function<String, MappedFieldType> fieldTypeLookup) {
for (Map.Entry<String, List<Object>> entry : fields().entrySet()) {
MappedFieldType fieldType = fieldTypeLookup.apply(entry.getKey());
if (fieldType == null) {
continue; // TODO this is lame
}
List<Object> fieldValues = entry.getValue();
for (int i = 0; i < fieldValues.size(); i++) {
fieldValues.set(i, fieldType.valueForDisplay(fieldValues.get(i)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,11 +404,22 @@ public boolean hasMappings() {
return this != EMPTY;
}

/**
* Will there be {@code _source}.
*/
public boolean isSourceEnabled() {
SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class);
return sfm != null && sfm.enabled();
}

/**
* Does the source need to be rebuilt on the fly?
*/
public boolean isSourceSynthetic() {
SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class);
return sfm != null && sfm.isSynthetic();
}

/**
* Build something to load source {@code _source}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,20 @@ public Set<String> sourcePath(String fullName) {
return mappingLookup.sourcePaths(fullName);
}

/**
* Will there be {@code _source}.
*/
public boolean isSourceEnabled() {
return mappingLookup.isSourceEnabled();
}

/**
* Does the source need to be rebuilt on the fly?
*/
public boolean isSourceSynthetic() {
return mappingLookup.isSourceSynthetic();
}

/**
* Build something to load source {@code _source}.
*/
Expand Down