From 9316f6d73bdd5f3c99d98b755074963dccd987b5 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Thu, 4 Jan 2024 21:13:28 +0000 Subject: [PATCH] Fix parsing of flat object fields with dots in keys (#11425) We have a bug where a flat object field with inner fields that contain dots will "push" the dotted name onto the dot-path from the root, but then would just "pop" off the last part of the dotted name. This change adds more robust support for flat object keys and subkeys that contain dots (i.e. it pops off the entirety of the latest key, regardless of how many dots it contains). Fixes https://github.com/opensearch-project/OpenSearch/issues/11402 Signed-off-by: Michael Froh (cherry picked from commit 22b628bacaab56c145538898ca190ffce9778c6e) --- CHANGELOG.md | 1 + .../xcontent/JsonToStringXContentParser.java | 31 +++-- .../index/mapper/FlatObjectFieldMapper.java | 2 +- .../JsonToStringXContentParserTests.java | 113 ++++++++++++++++++ 4 files changed, 136 insertions(+), 11 deletions(-) create mode 100644 server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index c942e593d9566..998934bb54535 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -120,6 +120,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix the issue with DefaultSpanScope restoring wrong span in the TracerContextStorage upon detach ([#11316](https://github.com/opensearch-project/OpenSearch/issues/11316)) - Remove shadowJar from `lang-painless` module publication ([#11369](https://github.com/opensearch-project/OpenSearch/issues/11369)) - Fix remote shards balancer and remove unused variables ([#11167](https://github.com/opensearch-project/OpenSearch/pull/11167)) +- Fix parsing of flat object fields with dots in keys ([#11425](https://github.com/opensearch-project/OpenSearch/pull/11425)) - Fix bug where replication lag grows post primary relocation ([#11238](https://github.com/opensearch-project/OpenSearch/pull/11238)) - Fix for stuck update action in a bulk with `retry_on_conflict` property ([#11152](https://github.com/opensearch-project/OpenSearch/issues/11152)) - Fix template setting override for replication type ([#11417](https://github.com/opensearch-project/OpenSearch/pull/11417)) diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 9e81d8a7af078..9b2bd06a88e2e 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -18,7 +18,6 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentLocation; import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.index.mapper.ParseContext; import java.io.IOException; import java.math.BigInteger; @@ -40,7 +39,6 @@ public class JsonToStringXContentParser extends AbstractXContentParser { private ArrayList keyList = new ArrayList<>(); private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent); - private ParseContext parseContext; private NamedXContentRegistry xContentRegistry; @@ -54,14 +52,13 @@ public class JsonToStringXContentParser extends AbstractXContentParser { public JsonToStringXContentParser( NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, - ParseContext parseContext, + XContentParser parser, String fieldTypeName ) throws IOException { super(xContentRegistry, deprecationHandler); - this.parseContext = parseContext; this.deprecationHandler = deprecationHandler; this.xContentRegistry = xContentRegistry; - this.parser = parseContext.parser(); + this.parser = parser; this.fieldTypeName = fieldTypeName; } @@ -86,8 +83,22 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx StringBuilder parsedFields = new StringBuilder(); if (this.parser.currentToken() == Token.FIELD_NAME) { - path.append(DOT_SYMBOL + currentFieldName); - this.keyList.add(currentFieldName); + path.append(DOT_SYMBOL).append(currentFieldName); + int dotIndex = currentFieldName.indexOf(DOT_SYMBOL); + String fieldNameSuffix = currentFieldName; + // The field name may be of the form foo.bar.baz + // If that's the case, each "part" is a key. + while (dotIndex >= 0) { + String fieldNamePrefix = fieldNameSuffix.substring(0, dotIndex); + if (!fieldNamePrefix.isEmpty()) { + this.keyList.add(fieldNamePrefix); + } + fieldNameSuffix = fieldNameSuffix.substring(dotIndex + 1); + dotIndex = fieldNameSuffix.indexOf(DOT_SYMBOL); + } + if (!fieldNameSuffix.isEmpty()) { + this.keyList.add(fieldNameSuffix); + } } else if (this.parser.currentToken() == Token.START_ARRAY) { parseToken(path, currentFieldName); break; @@ -97,18 +108,18 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx parseToken(path, currentFieldName); int dotIndex = path.lastIndexOf(DOT_SYMBOL); if (dotIndex != -1) { - path.delete(dotIndex, path.length()); + path.setLength(path.length() - currentFieldName.length() - 1); } } else { if (!path.toString().contains(currentFieldName)) { - path.append(DOT_SYMBOL + currentFieldName); + path.append(DOT_SYMBOL).append(currentFieldName); } parseValue(parsedFields); this.valueList.add(parsedFields.toString()); this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); int dotIndex = path.lastIndexOf(DOT_SYMBOL); if (dotIndex != -1) { - path.delete(dotIndex, path.length()); + path.setLength(path.length() - currentFieldName.length() - 1); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 00b623dddac23..9a3f2595a7c9e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -572,7 +572,7 @@ protected void parseCreateField(ParseContext context) throws IOException { JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser( NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, - context, + context.parser(), fieldType().name() ); /* diff --git a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java new file mode 100644 index 0000000000000..0feb7bcd1ceec --- /dev/null +++ b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.xcontent; + +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class JsonToStringXContentParserTests extends OpenSearchTestCase { + + private String flattenJsonString(String fieldName, String in) throws IOException { + String transformed; + try ( + XContentParser parser = JsonXContent.jsonXContent.createParser( + xContentRegistry(), + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + in + ) + ) { + JsonToStringXContentParser jsonToStringXContentParser = new JsonToStringXContentParser( + xContentRegistry(), + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + parser, + fieldName + ); + // Skip the START_OBJECT token: + jsonToStringXContentParser.nextToken(); + + XContentParser transformedParser = jsonToStringXContentParser.parseObject(); + try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) { + jsonBuilder.copyCurrentStructure(transformedParser); + return jsonBuilder.toString(); + } + } + } + + public void testNestedObjects() throws IOException { + String jsonExample = "{" + "\"first\" : \"1\"," + "\"second\" : {" + " \"inner\": \"2.0\"" + "}," + "\"third\": \"three\"" + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + + public void testChildHasDots() throws IOException { + // This should be exactly the same as testNestedObjects. We're just using the "flat" notation for the inner + // object. + String jsonExample = "{" + "\"first\" : \"1\"," + "\"second.inner\" : \"2.0\"," + "\"third\": \"three\"" + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + + public void testNestChildObjectWithDots() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"really_inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"really_inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + + public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"totally.absolutely.inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + +}