From a95991cea093dec5ce668fdb42eec19d0ffcc6a3 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 22 Aug 2022 14:01:25 -0400 Subject: [PATCH 01/10] Synthetic _source: support match_only_text This adds support for synthetic `_source` to the `match_only_text` field type. When synthetic `_source` is enabled `match_only_text` fields create a hidden stored field to contain their text. This should have similar or better search performance for this specific field type, though it will have slightly worse indexing performance because synthetic `_source` is still writing `_recovery_source`, which means we're writing the bits for this field twice. --- .../extras/MatchOnlyTextFieldMapper.java | 62 +++++++++++++++++ .../extras/MatchOnlyTextFieldMapperTests.java | 68 ++++++++++++++++++- .../index/fieldvisitor/FieldsVisitor.java | 3 + .../index/mapper/DocumentParserContext.java | 8 +++ .../index/mapper/MappingLookup.java | 11 +++ .../index/query/SearchExecutionContext.java | 10 +++ 6 files changed, 159 insertions(+), 3 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index a8d73c89700ee..ab58d5ade1f3b 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -12,8 +12,10 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; @@ -36,8 +38,10 @@ import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.StringFieldType; +import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader; import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.index.mapper.TextParams; @@ -78,6 +82,19 @@ public static class Defaults { } + /** + * The {@link FieldType} used to store "original" so that they can be + * rebuilt for synthetic source. + */ + private static final FieldType ORIGINAL_FIELD_TYPE = new FieldType(); + static { + ORIGINAL_FIELD_TYPE.setTokenized(false); + ORIGINAL_FIELD_TYPE.setOmitNorms(true); + ORIGINAL_FIELD_TYPE.setIndexOptions(IndexOptions.NONE); + ORIGINAL_FIELD_TYPE.setStored(true); + ORIGINAL_FIELD_TYPE.freeze(); + } + public static class Builder extends FieldMapper.Builder { private final Version indexCreatedVersion; @@ -168,6 +185,35 @@ private Function, IOException "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." ); } + if (searchExecutionContext.isSourceSynthetic()) { + String name = originalFieldName(); + return context -> docID -> { + try { + List values = new ArrayList<>(); + context.reader().document(docID, new StoredFieldVisitor() { + private Status found = Status.NO; + + @Override + public Status needsField(FieldInfo fieldInfo) { + if (fieldInfo.name.equals(name)) { + found = Status.STOP; + return Status.YES; + } + return found; + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) { + assert fieldInfo.name.equals(name); + values.add(value); + } + }); + return values; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + } SourceLookup sourceLookup = searchExecutionContext.lookup().source(); ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null); return context -> { @@ -279,6 +325,9 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations"); } + private String originalFieldName() { + return name() + "._original"; + } } private final Version indexCreatedVersion; @@ -326,6 +375,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio Field field = new Field(fieldType().name(), value, fieldType); context.doc().add(field); context.addToFieldNames(fieldType().name()); + + if (context.isSyntheticSource()) { + context.doc().add(new Field(fieldType().originalFieldName(), value, ORIGINAL_FIELD_TYPE)); + } } @Override @@ -338,4 +391,13 @@ public MatchOnlyTextFieldType fieldType() { return (MatchOnlyTextFieldType) super.fieldType(); } + @Override + public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { + if (copyTo.copyToFields().isEmpty() != true) { + throw new IllegalArgumentException( + "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" + ); + } + return new StringStoredFieldFieldLoader(fieldType().originalFieldName(), simpleName()); + } } diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index 6a7270fe8f5bc..5a2bbc2973030 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -9,20 +9,28 @@ package org.elasticsearch.index.mapper.extras; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.CannedTokenStream; import org.apache.lucene.tests.analysis.Token; +import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xcontent.XContentBuilder; @@ -34,6 +42,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -52,9 +61,26 @@ protected Object getSampleValueForDocument() { } public final void testExists() throws IOException { - MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); })); + MapperService mapperService = createMapperService(testMapping()); assertExistsQuery(mapperService); - assertParseMinimalWarnings(); + } + + public final void testPhraseQuery() throws IOException { + MapperService mapperService = createMapperService(testMapping()); + try (Directory directory = newDirectory()) { + RandomIndexWriter iw = new RandomIndexWriter(random(), directory); + LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox"))).rootDoc(); + iw.addDocument(doc); + iw.close(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader)); + MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox"); + TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1); + assertThat(docs.totalHits.value, equalTo(1L)); + assertThat(docs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(docs.scoreDocs[0].doc, equalTo(0)); + } + } } @Override @@ -65,6 +91,13 @@ protected void registerParameters(ParameterChecker checker) throws IOException { ); } + private XContentBuilder testMapping() throws IOException { + if (randomBoolean()) { + return syntheticSourceMapping(b -> b.startObject("field").field("type", "match_only_text").endObject()); + } + return fieldMapping(b -> b.field("type", "match_only_text")); + } + @Override protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "match_only_text"); @@ -166,7 +199,36 @@ protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException @Override protected SyntheticSourceSupport syntheticSourceSupport() { - throw new AssumptionViolatedException("not supported"); + return new MatchOnlyTextSyntheticSourceSupport(); + } + + static class MatchOnlyTextSyntheticSourceSupport implements SyntheticSourceSupport { + @Override + public SyntheticSourceExample example(int maxValues) { + if (randomBoolean()) { + Tuple v = generateValue(); + return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping); + } + List> values = randomList(1, maxValues, this::generateValue); + List in = values.stream().map(Tuple::v1).toList(); + List outList = values.stream().map(Tuple::v2).toList(); + Object out = outList.size() == 1 ? outList.get(0) : outList; + return new SyntheticSourceExample(in, out, this::mapping); + } + + private Tuple generateValue() { + String v = randomList(1, 10, () -> randomAlphaOfLength(5)).stream().collect(Collectors.joining(" ")); + return Tuple.tuple(v, v); + } + + private void mapping(XContentBuilder b) throws IOException { + b.field("type", "match_only_text"); + } + + @Override + public List invalidExample() throws IOException { + return List.of(); + } } @Override diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java index b537c35001f20..8b04b2f3013ea 100644 --- a/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java @@ -84,6 +84,9 @@ public Set getFieldNames() { public final void postProcess(Function fieldTypeLookup) { for (Map.Entry> entry : fields().entrySet()) { MappedFieldType fieldType = fieldTypeLookup.apply(entry.getKey()); + if (fieldType == null) { + continue; // TODO this is lame + } List fieldValues = entry.getValue(); for (int i = 0; i < fieldValues.size(); i++) { fieldValues.set(i, fieldType.valueForDisplay(fieldValues.get(i))); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 600f45e83958d..a79bc9003ef75 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -436,6 +436,14 @@ public final DynamicTemplate findDynamicTemplate(String fieldName, DynamicTempla return null; } + /** + * Is this index configured to use synthetic source? + */ + public final boolean isSyntheticSource() { + SourceFieldMapper sft = mappingLookup.getMapping().getMetadataMapperByClass(SourceFieldMapper.class); + return sft == null ? false : sft.isSynthetic(); + } + // XContentParser that wraps an existing parser positioned on a value, // and a field name, and returns a stream that looks like { 'field' : 'value' } private static class CopyToParser extends FilterXContentParserWrapper { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index a45fa7ff0e248..fa303f2844d5f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -403,11 +403,22 @@ public boolean hasMappings() { return this != EMPTY; } + /** + * Will there be {@code _source} values. + */ public boolean isSourceEnabled() { SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class); return sfm != null && sfm.enabled(); } + /** + * Does the source need to be rebuilt on the fly? + */ + public boolean isSourceSynthetic() { + SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class); + return sfm != null && sfm.isSynthetic(); + } + /** * Build something to load source {@code _source}. */ diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java index f1514a959a31a..ee3c674b20335 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java @@ -391,10 +391,20 @@ public Set sourcePath(String fullName) { return mappingLookup.sourcePaths(fullName); } + /** + * Will there be {@code _source} values. + */ public boolean isSourceEnabled() { return mappingLookup.isSourceEnabled(); } + /** + * Does the source need to be rebuilt on the fly? + */ + public boolean isSourceSynthetic() { + return mappingLookup.isSourceSynthetic(); + } + /** * Build something to load source {@code _source}. */ From 2899228e7505b680bbb1d04a5bb7fe5b4c02465d Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 22 Aug 2022 14:40:36 -0400 Subject: [PATCH 02/10] Words --- .../main/java/org/elasticsearch/index/mapper/MappingLookup.java | 2 +- .../org/elasticsearch/index/query/SearchExecutionContext.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index fa303f2844d5f..ea636cc71681b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -404,7 +404,7 @@ public boolean hasMappings() { } /** - * Will there be {@code _source} values. + * Will there be {@code _source}. */ public boolean isSourceEnabled() { SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class); diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java index ee3c674b20335..77b1507861d0f 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java @@ -392,7 +392,7 @@ public Set sourcePath(String fullName) { } /** - * Will there be {@code _source} values. + * Will there be {@code _source}. */ public boolean isSourceEnabled() { return mappingLookup.isSourceEnabled(); From 4d2f0b6f0a849041294d5947f0453f2edc461656 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 22 Aug 2022 14:29:42 -0400 Subject: [PATCH 03/10] Update docs/changelog/89516.yaml --- docs/changelog/89516.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/89516.yaml diff --git a/docs/changelog/89516.yaml b/docs/changelog/89516.yaml new file mode 100644 index 0000000000000..c7215cad7681c --- /dev/null +++ b/docs/changelog/89516.yaml @@ -0,0 +1,5 @@ +pr: 89516 +summary: "Synthetic _source: support `match_only_text`" +area: "Mapping, TSDB" +type: enhancement +issues: [] From 2e268c30184a29966c61aa92b9d29189a4ac9385 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 22 Aug 2022 14:48:38 -0400 Subject: [PATCH 04/10] Fixup --- docs/changelog/89516.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/89516.yaml b/docs/changelog/89516.yaml index c7215cad7681c..0fd37b69361cd 100644 --- a/docs/changelog/89516.yaml +++ b/docs/changelog/89516.yaml @@ -1,5 +1,5 @@ pr: 89516 summary: "Synthetic _source: support `match_only_text`" -area: "Mapping, TSDB" +area: "TSDB" type: enhancement issues: [] From 0b242b949c978bcb542ac12bfb0a6e5f2af02985 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 22 Aug 2022 16:23:42 -0400 Subject: [PATCH 05/10] Fixup --- .../extras/MatchOnlyTextFieldMapper.java | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index ab58d5ade1f3b..f0ed0fbc581c4 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -188,30 +188,26 @@ private Function, IOException if (searchExecutionContext.isSourceSynthetic()) { String name = originalFieldName(); return context -> docID -> { - try { - List values = new ArrayList<>(); - context.reader().document(docID, new StoredFieldVisitor() { - private Status found = Status.NO; - - @Override - public Status needsField(FieldInfo fieldInfo) { - if (fieldInfo.name.equals(name)) { - found = Status.STOP; - return Status.YES; - } - return found; - } - - @Override - public void stringField(FieldInfo fieldInfo, String value) { - assert fieldInfo.name.equals(name); - values.add(value); + List values = new ArrayList<>(); + context.reader().document(docID, new StoredFieldVisitor() { + private Status found = Status.NO; + + @Override + public Status needsField(FieldInfo fieldInfo) { + if (fieldInfo.name.equals(name)) { + found = Status.STOP; + return Status.YES; } - }); - return values; - } catch (IOException e) { - throw new UncheckedIOException(e); - } + return found; + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) { + assert fieldInfo.name.equals(name); + values.add(value); + } + }); + return values; }; } SourceLookup sourceLookup = searchExecutionContext.lookup().source(); From 0d9fe2d6a53511f941505e31ccefc0163b35f714 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 23 Aug 2022 11:41:35 -0400 Subject: [PATCH 06/10] More tests for enrich processor Adds more tests for the enrich processor around different index types. Right now they all work fine (yay!) but this feels like a good amount of paranoia. --- .../test/enrich/20_standard_index.yml | 106 +++++++++++++++++ .../test/enrich/30_tsdb_index.yml | 111 ++++++++++++++++++ .../test/enrich/40_synthetic_source.yml | 108 +++++++++++++++++ 3 files changed, 325 insertions(+) create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/20_standard_index.yml create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/30_tsdb_index.yml create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/40_synthetic_source.yml diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/20_standard_index.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/20_standard_index.yml new file mode 100644 index 0000000000000..d19cd1cf56f78 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/20_standard_index.yml @@ -0,0 +1,106 @@ +--- +setup: + - do: + indices.create: + index: source + body: + mappings: + properties: + baz: + type: keyword + a: + type: keyword + b: + type: keyword + + - do: + bulk: + refresh: true + index: source + body: + - '{"index": {}}' + - '{"baz": "quick", "a": "brown", "b": "fox"}' + - '{"index": {}}' + - '{"baz": "lazy", "a": "dog"}' + + - do: + enrich.put_policy: + name: test_policy + body: + match: + indices: ["source*"] + match_field: baz + enrich_fields: ["a", "b"] + + - do: + enrich.execute_policy: + name: test_policy + + - do: + ingest.put_pipeline: + id: test_pipeline + body: + processors: + - enrich: + policy_name: test_policy + field: baz + target_field: target + +--- +teardown: + - do: + ingest.delete_pipeline: + id: test_pipeline + + - do: + enrich.delete_policy: + name: test_policy + +--- +run: + - do: + bulk: + refresh: true + index: target + pipeline: test_pipeline + body: + - '{"index": {"_id": "1"}}' + - '{"baz": "quick", "c": 1}' + - '{"index": {"_id": "2"}}' + - '{"baz": "lazy", "c": 2}' + - '{"index": {"_id": "3"}}' + - '{"baz": "slow", "c": 3}' + + - do: + get: + index: target + id: "1" + - match: + _source: + baz: quick + target: + baz: quick + a: brown + b: fox + c: 1 + + - do: + get: + index: target + id: "2" + - match: + _source: + baz: lazy + target: + baz: lazy + a: dog + c: 2 + + - do: + get: + index: target + id: "3" + - match: + _source: + baz: slow + c: 3 diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/30_tsdb_index.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/30_tsdb_index.yml new file mode 100644 index 0000000000000..064575808b79d --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/30_tsdb_index.yml @@ -0,0 +1,111 @@ +--- +setup: + - do: + indices.create: + index: source + body: + settings: + index: + mode: time_series + routing_path: baz + mappings: + properties: + baz: + type: keyword + time_series_dimension: true + a: + type: keyword + b: + type: keyword + + - do: + bulk: + refresh: true + index: source + body: + - '{"index": {}}' + - '{"@timestamp": "2022-01-01T00:00:00Z", "baz": "quick", "a": "brown", "b": "fox"}' + - '{"index": {}}' + - '{"@timestamp": "2022-01-01T00:00:00Z", "baz": "lazy", "a": "dog"}' + + - do: + enrich.put_policy: + name: test_policy + body: + match: + indices: ["source*"] + match_field: baz + enrich_fields: ["a", "b"] + + - do: + enrich.execute_policy: + name: test_policy + + - do: + ingest.put_pipeline: + id: test_pipeline + body: + processors: + - enrich: + policy_name: test_policy + field: baz + target_field: target + +--- +teardown: + - do: + ingest.delete_pipeline: + id: test_pipeline + + - do: + enrich.delete_policy: + name: test_policy + +--- +run: + - do: + bulk: + refresh: true + index: target + pipeline: test_pipeline + body: + - '{"index": {"_id": "1"}}' + - '{"baz": "quick", "c": 1}' + - '{"index": {"_id": "2"}}' + - '{"baz": "lazy", "c": 2}' + - '{"index": {"_id": "3"}}' + - '{"baz": "slow", "c": 3}' + + - do: + get: + index: target + id: "1" + - match: + _source: + baz: quick + target: + baz: quick + a: brown + b: fox + c: 1 + + - do: + get: + index: target + id: "2" + - match: + _source: + baz: lazy + target: + baz: lazy + a: dog + c: 2 + + - do: + get: + index: target + id: "3" + - match: + _source: + baz: slow + c: 3 diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/40_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/40_synthetic_source.yml new file mode 100644 index 0000000000000..6a0b011983e09 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/40_synthetic_source.yml @@ -0,0 +1,108 @@ +--- +setup: + - do: + indices.create: + index: source + body: + mappings: + _source: + mode: synthetic + properties: + baz: + type: keyword + a: + type: keyword + b: + type: keyword + + - do: + bulk: + refresh: true + index: source + body: + - '{"index": {}}' + - '{"baz": "quick", "a": "brown", "b": "fox"}' + - '{"index": {}}' + - '{"baz": "lazy", "a": "dog"}' + + - do: + enrich.put_policy: + name: test_policy + body: + match: + indices: ["source*"] + match_field: baz + enrich_fields: ["a", "b"] + + - do: + enrich.execute_policy: + name: test_policy + + - do: + ingest.put_pipeline: + id: test_pipeline + body: + processors: + - enrich: + policy_name: test_policy + field: baz + target_field: target + +--- +teardown: + - do: + ingest.delete_pipeline: + id: test_pipeline + + - do: + enrich.delete_policy: + name: test_policy + +--- +run: + - do: + bulk: + refresh: true + index: target + pipeline: test_pipeline + body: + - '{"index": {"_id": "1"}}' + - '{"baz": "quick", "c": 1}' + - '{"index": {"_id": "2"}}' + - '{"baz": "lazy", "c": 2}' + - '{"index": {"_id": "3"}}' + - '{"baz": "slow", "c": 3}' + + - do: + get: + index: target + id: "1" + - match: + _source: + baz: quick + target: + baz: quick + a: brown + b: fox + c: 1 + + - do: + get: + index: target + id: "2" + - match: + _source: + baz: lazy + target: + baz: lazy + a: dog + c: 2 + + - do: + get: + index: target + id: "3" + - match: + _source: + baz: slow + c: 3 From 1b08a25d06e83d1a6c612de67d9fbcc9153faa92 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 23 Aug 2022 13:47:19 -0400 Subject: [PATCH 07/10] Clean --- .../extras/MatchOnlyTextFieldMapper.java | 22 ++++------------- .../extras/MatchOnlyTextFieldMapperTests.java | 24 +++++++++++++------ 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index ec5edabaccae7..a29bba5b50d77 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -12,6 +12,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; @@ -85,19 +86,6 @@ public static class Defaults { } - /** - * The {@link FieldType} used to store "original" so that they can be - * rebuilt for synthetic source. - */ - private static final FieldType ORIGINAL_FIELD_TYPE = new FieldType(); - static { - ORIGINAL_FIELD_TYPE.setTokenized(false); - ORIGINAL_FIELD_TYPE.setOmitNorms(true); - ORIGINAL_FIELD_TYPE.setIndexOptions(IndexOptions.NONE); - ORIGINAL_FIELD_TYPE.setStored(true); - ORIGINAL_FIELD_TYPE.freeze(); - } - public static class Builder extends FieldMapper.Builder { private final Version indexCreatedVersion; @@ -189,7 +177,7 @@ private Function, IOException ); } if (searchExecutionContext.isSourceSynthetic()) { - String name = originalFieldName(); + String name = storedFieldNameForSyntheticSource(); return context -> docID -> { List values = new ArrayList<>(); context.reader().document(docID, new StoredFieldVisitor() { @@ -334,7 +322,7 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations"); } - private String originalFieldName() { + private String storedFieldNameForSyntheticSource() { return name() + "._original"; } } @@ -386,7 +374,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.addToFieldNames(fieldType().name()); if (context.isSyntheticSource()) { - context.doc().add(new Field(fieldType().originalFieldName(), value, ORIGINAL_FIELD_TYPE)); + context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value)); } } @@ -407,6 +395,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new StringStoredFieldFieldLoader(fieldType().originalFieldName(), simpleName()); + return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName()); } } diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java index 5a2bbc2973030..1179c03a35ab3 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java @@ -60,13 +60,23 @@ protected Object getSampleValueForDocument() { return "value"; } - public final void testExists() throws IOException { - MapperService mapperService = createMapperService(testMapping()); - assertExistsQuery(mapperService); + public void testExistsStandardSource() throws IOException { + assertExistsQuery(createMapperService(testMapping(false))); } - public final void testPhraseQuery() throws IOException { - MapperService mapperService = createMapperService(testMapping()); + public void testExistsSyntheticSource() throws IOException { + assertExistsQuery(createMapperService(testMapping(true))); + } + + public void testPhraseQueryStandardSource() throws IOException { + assertPhraseQuery(createMapperService(testMapping(false))); + } + + public void testPhraseQuerySyntheticSource() throws IOException { + assertPhraseQuery(createMapperService(testMapping(true))); + } + + private void assertPhraseQuery(MapperService mapperService) throws IOException { try (Directory directory = newDirectory()) { RandomIndexWriter iw = new RandomIndexWriter(random(), directory); LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox"))).rootDoc(); @@ -91,8 +101,8 @@ protected void registerParameters(ParameterChecker checker) throws IOException { ); } - private XContentBuilder testMapping() throws IOException { - if (randomBoolean()) { + private XContentBuilder testMapping(boolean syntheticSource) throws IOException { + if (syntheticSource) { return syntheticSourceMapping(b -> b.startObject("field").field("type", "match_only_text").endObject()); } return fieldMapping(b -> b.field("type", "match_only_text")); From 8998678b22b67661aada4a733c60c8384258ffee Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 31 Aug 2022 15:32:51 -0400 Subject: [PATCH 08/10] Update --- .../extras/MatchOnlyTextFieldMapper.java | 33 ++++++------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index a29bba5b50d77..f4c165c3ce221 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -13,10 +13,8 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; @@ -37,6 +35,8 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; +import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; +import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperBuilderContext; @@ -61,6 +61,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.function.Function; /** @@ -178,27 +179,13 @@ private Function, IOException } if (searchExecutionContext.isSourceSynthetic()) { String name = storedFieldNameForSyntheticSource(); - return context -> docID -> { - List values = new ArrayList<>(); - context.reader().document(docID, new StoredFieldVisitor() { - private Status found = Status.NO; - - @Override - public Status needsField(FieldInfo fieldInfo) { - if (fieldInfo.name.equals(name)) { - found = Status.STOP; - return Status.YES; - } - return found; - } - - @Override - public void stringField(FieldInfo fieldInfo, String value) { - assert fieldInfo.name.equals(name); - values.add(value); - } - }); - return values; + StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name)); + return context -> { + LeafStoredFieldLoader leafLoader = loader.getLoader(context, null); + return docId -> { + leafLoader.advanceTo(docId); + return leafLoader.storedFields().get(name); + }; }; } SourceLookup sourceLookup = searchExecutionContext.lookup().source(); From 6c27a515dcd5eba8d0416f7f3dd9087038df831c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 1 Sep 2022 11:06:31 -0400 Subject: [PATCH 09/10] Compile after merge --- .../index/mapper/extras/MatchOnlyTextFieldMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index f4c165c3ce221..5f75663857018 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -382,6 +382,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName()); + return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName(), null); } } From 67319d8f9007076499ac1d4f9e37be19de3522b0 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 1 Sep 2022 11:20:07 -0400 Subject: [PATCH 10/10] Fixup --- .../index/mapper/extras/MatchOnlyTextFieldMapper.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 5f75663857018..26929a62d9766 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -53,6 +53,7 @@ import org.elasticsearch.script.field.TextDocValuesField; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.lookup.SourceLookup; +import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.io.UncheckedIOException; @@ -382,6 +383,11 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName(), null); + return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName(), null) { + @Override + protected void write(XContentBuilder b, Object value) throws IOException { + b.value((String) value); + } + }; } }