Skip to content

Commit

Permalink
Add DerivedFieldMapper and support parsing it in mappings (opensearch…
Browse files Browse the repository at this point in the history
…-project#12569)

Adds a DerivedFieldMapper to support the Derived Fields feature enhancement as well as updating the mapper parsing logic to recognize and currently parse derived fields in the mappings.

---------

Signed-off-by: Mohammad Qureshi <qreshi@amazon.com>
Signed-off-by: Rishabh Maurya <rishabhmaurya05@gmail.com>
Co-authored-by: Rishabh Maurya <rishabhmaurya05@gmail.com>
Signed-off-by: Shivansh Arora <hishiv@amazon.com>
  • Loading branch information
2 people authored and shiv0408 committed Apr 25, 2024
1 parent 3af069e commit 27dec47
Show file tree
Hide file tree
Showing 10 changed files with 686 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Allow setting KEYSTORE_PASSWORD through env variable ([#12865](https://github.com/opensearch-project/OpenSearch/pull/12865))
- [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697))
- [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954))
- Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569))
- Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044))
- Add cluster primary balance contraint for rebalancing with buffer ([#12656](https://github.com/opensearch-project/OpenSearch/pull/12656))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.index.IndexableField;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.script.Script;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.function.Function;

/**
* A field mapper for derived fields
*
* @opensearch.internal
*/
public class DerivedFieldMapper extends ParametrizedFieldMapper {

public static final String CONTENT_TYPE = "derived";

private static DerivedFieldMapper toType(FieldMapper in) {
return (DerivedFieldMapper) in;
}

/**
* Builder for this field mapper
*
* @opensearch.internal
*/
public static class Builder extends ParametrizedFieldMapper.Builder {
// TODO: The type of parameter may change here if the actual underlying FieldType object is needed
private final Parameter<String> type = Parameter.stringParam("type", false, m -> toType(m).type, "text");

private final Parameter<Script> script = new Parameter<>(
"script",
false,
() -> null,
(n, c, o) -> o == null ? null : Script.parse(o),
m -> toType(m).script
).setSerializerCheck((id, ic, value) -> value != null);

public Builder(String name) {
super(name);
}

@Override
protected List<Parameter<?>> getParameters() {
return Arrays.asList(type, script);
}

@Override
public DerivedFieldMapper build(BuilderContext context) {
FieldMapper fieldMapper = DerivedFieldSupportedTypes.getFieldMapperFromType(type.getValue(), name, context);
Function<Object, IndexableField> fieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(
type.getValue(),
name
);
DerivedFieldType ft = new DerivedFieldType(
buildFullName(context),
type.getValue(),
script.getValue(),
fieldMapper,
fieldFunction
);
return new DerivedFieldMapper(name, ft, multiFieldsBuilder.build(this, context), copyTo.build(), this);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n));
private final String type;
private final Script script;

protected DerivedFieldMapper(
String simpleName,
MappedFieldType mappedFieldType,
MultiFields multiFields,
CopyTo copyTo,
Builder builder
) {
super(simpleName, mappedFieldType, multiFields, copyTo);
this.type = builder.type.getValue();
this.script = builder.script.getValue();
}

@Override
public DerivedFieldType fieldType() {
return (DerivedFieldType) super.fieldType();
}

@Override
protected void parseCreateField(ParseContext context) throws IOException {
// Leaving this empty as the parsing should be handled via the Builder when root object is parsed.
// The context would not contain anything in this case since the DerivedFieldMapper is not indexed or stored.
throw new UnsupportedOperationException("should not be invoked");
}

@Override
public ParametrizedFieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName()).init(this);
}

@Override
protected String contentType() {
return CONTENT_TYPE;
}

@Override
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
getMergeBuilder().toXContent(builder, includeDefaults);
multiFields.toXContent(builder, params);
copyTo.toXContent(builder, params);
}

public String getType() {
return type;
}

public Script getScript() {
return script;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,15 @@ protected static boolean parseObjectOrDocumentTypeProperties(
} else if (fieldName.equals("enabled")) {
builder.enabled(XContentMapValues.nodeBooleanValue(fieldNode, fieldName + ".enabled"));
return true;
} else if (fieldName.equals("derived")) {
if (fieldNode instanceof Collection && ((Collection) fieldNode).isEmpty()) {
// nothing to do here, empty (to support "derived: []" case)
} else if (fieldNode instanceof Map) {
parseDerived(builder, (Map<String, Object>) fieldNode, parserContext);
} else {
throw new OpenSearchParseException("derived must be a map type");
}
return true;
} else if (fieldName.equals("properties")) {
if (fieldNode instanceof Collection && ((Collection) fieldNode).isEmpty()) {
// nothing to do here, empty (to support "properties: []" case)
Expand Down Expand Up @@ -349,6 +358,55 @@ protected static void parseNested(
}
}

protected static void parseDerived(ObjectMapper.Builder objBuilder, Map<String, Object> derivedNode, ParserContext parserContext) {
Iterator<Map.Entry<String, Object>> iterator = derivedNode.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String, Object> entry = iterator.next();
String fieldName = entry.getKey();
// Should accept empty arrays, as a work around for when the
// user can't provide an empty Map. (PHP for example)
boolean isEmptyList = entry.getValue() instanceof List && ((List<?>) entry.getValue()).isEmpty();

if (entry.getValue() instanceof Map) {
@SuppressWarnings("unchecked")
Map<String, Object> node = (Map<String, Object>) entry.getValue();

// Derived fields are a bit unique in that the 'type' attribute does not map to the TypeParser
// like it would for traditional fields in properties.
// So in this case, the DerivedFieldMapper's TypeParser will explicitly be used
Mapper.TypeParser typeParser = parserContext.typeParser(DerivedFieldMapper.CONTENT_TYPE);
String[] fieldNameParts = fieldName.split("\\.");
// field name is just ".", which is invalid
if (fieldNameParts.length < 1) {
throw new MapperParsingException("Invalid field name " + fieldName);
}
String realFieldName = fieldNameParts[fieldNameParts.length - 1];
Mapper.Builder<?> fieldBuilder = typeParser.parse(realFieldName, node, parserContext);
for (int i = fieldNameParts.length - 2; i >= 0; --i) {
ObjectMapper.Builder<?> intermediate = new ObjectMapper.Builder<>(fieldNameParts[i]);
intermediate.add(fieldBuilder);
fieldBuilder = intermediate;
}
objBuilder.add(fieldBuilder);
node.remove("type");
DocumentMapperParser.checkNoRemainingFields(fieldName, node, parserContext.indexVersionCreated());
iterator.remove();
} else if (isEmptyList) {
iterator.remove();
} else {
throw new MapperParsingException(
"Expected map for property [derived_fields] on field [" + fieldName + "] but got a " + fieldName.getClass()
);
}
}

DocumentMapperParser.checkNoRemainingFields(
derivedNode,
parserContext.indexVersionCreated(),
"DocType mapping definition has unsupported parameters: "
);
}

protected static void parseProperties(ObjectMapper.Builder objBuilder, Map<String, Object> propsNode, ParserContext parserContext) {
Iterator<Map.Entry<String, Object>> iterator = propsNode.entrySet().iterator();
while (iterator.hasNext()) {
Expand Down Expand Up @@ -663,7 +721,21 @@ public void toXContent(XContentBuilder builder, Params params, ToXContent custom
doXContent(builder, params);

// sort the mappers so we get consistent serialization format
Mapper[] sortedMappers = mappers.values().stream().toArray(size -> new Mapper[size]);
Mapper[] derivedSortedMappers = mappers.values()
.stream()
.filter(m -> m instanceof DerivedFieldMapper)
.toArray(size -> new Mapper[size]);
Arrays.sort(derivedSortedMappers, new Comparator<Mapper>() {
@Override
public int compare(Mapper o1, Mapper o2) {
return o1.name().compareTo(o2.name());
}
});

Mapper[] sortedMappers = mappers.values()
.stream()
.filter(m -> !(m instanceof DerivedFieldMapper))
.toArray(size -> new Mapper[size]);
Arrays.sort(sortedMappers, new Comparator<Mapper>() {
@Override
public int compare(Mapper o1, Mapper o2) {
Expand All @@ -672,6 +744,17 @@ public int compare(Mapper o1, Mapper o2) {
});

int count = 0;
for (Mapper mapper : derivedSortedMappers) {
if (count++ == 0) {
builder.startObject("derived");
}
mapper.toXContent(builder, params);
}
if (count > 0) {
builder.endObject();
}

count = 0;
for (Mapper mapper : sortedMappers) {
if (!(mapper instanceof MetadataFieldMapper)) {
if (count++ == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,11 @@ public final void parse(String name, ParserContext parserContext, Map<String, Ob
deprecatedParamsMap.put(deprecatedName, param);
}
}
String type = (String) fieldNode.remove("type");
String type = (String) fieldNode.get("type");
if (paramsMap.get("type") == null) {
fieldNode.remove("type");
}

for (Iterator<Map.Entry<String, Object>> iterator = fieldNode.entrySet().iterator(); iterator.hasNext();) {
Map.Entry<String, Object> entry = iterator.next();
final String propName = entry.getKey();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.index.mapper.ConstantKeywordFieldMapper;
import org.opensearch.index.mapper.DataStreamFieldMapper;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.DerivedFieldMapper;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.FieldAliasMapper;
import org.opensearch.index.mapper.FieldNamesFieldMapper;
Expand Down Expand Up @@ -170,6 +171,7 @@ public static Map<String, Mapper.TypeParser> getMappers(List<MapperPlugin> mappe
mappers.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser());
mappers.put(FlatObjectFieldMapper.CONTENT_TYPE, FlatObjectFieldMapper.PARSER);
mappers.put(ConstantKeywordFieldMapper.CONTENT_TYPE, new ConstantKeywordFieldMapper.TypeParser());
mappers.put(DerivedFieldMapper.CONTENT_TYPE, DerivedFieldMapper.PARSER);

for (MapperPlugin mapperPlugin : mapperPlugins) {
for (Map.Entry<String, Mapper.TypeParser> entry : mapperPlugin.getMappers().entrySet()) {
Expand Down
Loading

0 comments on commit 27dec47

Please sign in to comment.