Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthetic _source: support field in many cases #89950

Merged
merged 17 commits into from
Nov 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/89950.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 89950
summary: "Synthetic _source: support `field` in many cases"
area: TSDB
type: enhancement
issues: []

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.DocumentParserContext;
Expand Down Expand Up @@ -121,7 +122,13 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(context.buildFullName(name), tsi, indexAnalyzer, meta.getValue());
MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(
context.buildFullName(name),
tsi,
indexAnalyzer,
context.isSourceSynthetic(),
meta.getValue()
);
return ft;
}

Expand All @@ -148,17 +155,24 @@ public static class MatchOnlyTextFieldType extends StringFieldType {
private final Analyzer indexAnalyzer;
private final TextFieldType textFieldType;

public MatchOnlyTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, Map<String, String> meta) {
public MatchOnlyTextFieldType(
String name,
TextSearchInfo tsi,
Analyzer indexAnalyzer,
boolean isSyntheticSource,
Map<String, String> meta
) {
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldType(name);
this.textFieldType = new TextFieldType(name, isSyntheticSource);
}

public MatchOnlyTextFieldType(String name) {
this(
name,
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
Lucene.STANDARD_ANALYZER,
false,
Collections.emptyMap()
);
}
Expand Down Expand Up @@ -305,17 +319,28 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,

@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
if (fieldDataContext.fielddataOperation() == FielddataOperation.SCRIPT) {
return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
name(),
if (fieldDataContext.fielddataOperation() != FielddataOperation.SCRIPT) {
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
}
if (textFieldType.isSyntheticSource()) {
return (cache, breaker) -> new StoredFieldSortedBinaryIndexFieldData(
storedFieldNameForSyntheticSource(),
CoreValuesSourceType.KEYWORD,
SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
fieldDataContext.lookupSupplier().get().source(),
TextDocValuesField::new
);
) {
@Override
protected BytesRef storedToBytesRef(Object stored) {
return new BytesRef((String) stored);
}
};
}

throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
name(),
CoreValuesSourceType.KEYWORD,
SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
fieldDataContext.lookupSupplier().get().source(),
TextDocValuesField::new
);
}

private String storedFieldNameForSyntheticSource() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,16 @@ public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
}
}

public void testDocValues() throws IOException {
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "match_only_text")));
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
}

public void testDocValuesLoadedFromSynthetic() throws IOException {
MapperService mapper = createMapperService(syntheticSourceFieldMapping(b -> b.field("type", "match_only_text")));
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
}

@Override
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,13 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder
wrapAnalyzer(analyzers.getSearchAnalyzer()),
wrapAnalyzer(analyzers.getSearchQuoteAnalyzer())
);
return new AnnotatedTextFieldType(context.buildFullName(name), store.getValue(), tsi, meta.getValue());
return new AnnotatedTextFieldType(
context.buildFullName(name),
store.getValue(),
tsi,
context.isSourceSynthetic(),
meta.getValue()
);
}

@Override
Expand Down Expand Up @@ -467,8 +473,14 @@ private void emitAnnotation(int firstSpannedTextPosInc, int annotationPosLen) th

public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {

private AnnotatedTextFieldType(String name, boolean store, TextSearchInfo tsi, Map<String, String> meta) {
super(name, true, store, tsi, meta);
private AnnotatedTextFieldType(
String name,
boolean store,
TextSearchInfo tsi,
boolean isSyntheticSource,
Map<String, String> meta
) {
super(name, true, store, tsi, isSyntheticSource, meta);
}

public AnnotatedTextFieldType(String name, Map<String, String> meta) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.fielddata;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.SortField;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
import org.elasticsearch.script.field.ToScriptFieldFactory;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.search.sort.BucketedSort;
import org.elasticsearch.search.sort.SortOrder;

import java.util.Set;

/**
* Per segment values for a field loaded from stored fields.
*/
public abstract class StoredFieldIndexFieldData<T> implements IndexFieldData<StoredFieldIndexFieldData<T>.StoredFieldLeafFieldData> {
private final String fieldName;
private final ValuesSourceType valuesSourceType;
protected final ToScriptFieldFactory<T> toScriptFieldFactory;
protected final StoredFieldLoader loader;

protected StoredFieldIndexFieldData(String fieldName, ValuesSourceType valuesSourceType, ToScriptFieldFactory<T> toScriptFieldFactory) {
this.fieldName = fieldName;
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
this.loader = StoredFieldLoader.create(false, Set.of(fieldName));
}

@Override
public String getFieldName() {
return fieldName;
}

@Override
public ValuesSourceType getValuesSourceType() {
return valuesSourceType;
}

@Override
public final StoredFieldLeafFieldData load(LeafReaderContext context) {
return loadDirect(context);
}

@Override
public final StoredFieldLeafFieldData loadDirect(LeafReaderContext context) {
return new StoredFieldLeafFieldData(loader.getLoader(context, null));
}

protected abstract T loadLeaf(LeafStoredFieldLoader leafStoredFieldLoader);

@Override
public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
throw new IllegalArgumentException("not supported for stored field fallback");
}

@Override
public BucketedSort newBucketedSort(
BigArrays bigArrays,
Object missingValue,
MultiValueMode sortMode,
XFieldComparatorSource.Nested nested,
SortOrder sortOrder,
DocValueFormat format,
int bucketSize,
BucketedSort.ExtraData extra
) {
throw new IllegalArgumentException("not supported for stored field fallback");
}

public class StoredFieldLeafFieldData implements LeafFieldData {
private final LeafStoredFieldLoader loader;

protected StoredFieldLeafFieldData(LeafStoredFieldLoader loader) {
this.loader = loader;
}

@Override
public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
return toScriptFieldFactory.getScriptFieldFactory(loadLeaf(loader), fieldName);
}

@Override
public long ramBytesUsed() {
return 0;
}

@Override
public void close() {}

@Override
public SortedBinaryDocValues getBytesValues() {
throw new IllegalArgumentException("not supported for source fallback");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.fielddata;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.script.field.ToScriptFieldFactory;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
* Per segment values for a field loaded from stored fields exposing {@link SortedBinaryDocValues}.
*/
public abstract class StoredFieldSortedBinaryIndexFieldData extends StoredFieldIndexFieldData<SortedBinaryDocValues> {

protected StoredFieldSortedBinaryIndexFieldData(
String fieldName,
ValuesSourceType valuesSourceType,
ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory
) {
super(fieldName, valuesSourceType, toScriptFieldFactory);
}

@Override
protected SourceValueFetcherSortedBinaryDocValues loadLeaf(LeafStoredFieldLoader leafStoredFieldLoader) {
return new SourceValueFetcherSortedBinaryDocValues(leafStoredFieldLoader);
}

protected abstract BytesRef storedToBytesRef(Object stored);

class SourceValueFetcherSortedBinaryDocValues extends SortedBinaryDocValues {
private final LeafStoredFieldLoader loader;
private final List<BytesRef> sorted = new ArrayList<>();

private int current;
private int docValueCount;

SourceValueFetcherSortedBinaryDocValues(LeafStoredFieldLoader loader) {
this.loader = loader;
}

@Override
public boolean advanceExact(int doc) throws IOException {
loader.advanceTo(doc);
List<Object> values = loader.storedFields().get(getFieldName());
if (values == null || values.isEmpty()) {
current = 0;
docValueCount = 0;
return false;
}
sorted.clear();
for (Object o : values) {
sorted.add(storedToBytesRef(o));
}
Collections.sort(sorted);
current = 0;
docValueCount = sorted.size();
return true;
}

@Override
public int docValueCount() {
return docValueCount;
}

@Override
public BytesRef nextValue() throws IOException {
assert current < docValueCount;
return sorted.get(current++);
}
}
}
Loading