Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve case for RowType's field name and JSON content when CAST #21869

Merged
merged 1 commit into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.common;

public class InvalidTypeDefinitionException
extends RuntimeException
{
public InvalidTypeDefinitionException(String message)
{
this(message, null);
}

public InvalidTypeDefinitionException(Throwable throwable)
{
this(null, throwable);
}

public InvalidTypeDefinitionException(String message, Throwable cause)
{
super(message, cause);
}

@Override
public String getMessage()
{
return super.getMessage();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class SqlFunctionProperties
private final Locale sessionLocale;
private final String sessionUser;
private final boolean fieldNamesInJsonCastEnabled;
private final boolean legacyJsonCast;
private final Map<String, String> extraCredentials;

private SqlFunctionProperties(
Expand All @@ -46,6 +47,7 @@ private SqlFunctionProperties(
Locale sessionLocale,
String sessionUser,
boolean fieldNamesInJsonCastEnabled,
boolean legacyJsonCast,
Map<String, String> extraCredentials)
{
this.parseDecimalLiteralAsDouble = parseDecimalLiteralAsDouble;
Expand All @@ -57,6 +59,7 @@ private SqlFunctionProperties(
this.sessionLocale = requireNonNull(sessionLocale, "sessionLocale is null");
this.sessionUser = requireNonNull(sessionUser, "sessionUser is null");
this.fieldNamesInJsonCastEnabled = fieldNamesInJsonCastEnabled;
this.legacyJsonCast = legacyJsonCast;
this.extraCredentials = requireNonNull(extraCredentials, "extraCredentials is null");
}

Expand Down Expand Up @@ -111,6 +114,11 @@ public boolean isFieldNamesInJsonCastEnabled()
return fieldNamesInJsonCastEnabled;
}

public boolean isLegacyJsonCast()
{
return legacyJsonCast;
}

@Override
public boolean equals(Object o)
{
Expand All @@ -129,13 +137,16 @@ public boolean equals(Object o)
Objects.equals(sessionStartTime, that.sessionStartTime) &&
Objects.equals(sessionLocale, that.sessionLocale) &&
Objects.equals(sessionUser, that.sessionUser) &&
Objects.equals(extraCredentials, that.extraCredentials);
Objects.equals(extraCredentials, that.extraCredentials) &&
Objects.equals(legacyJsonCast, that.legacyJsonCast);
}

@Override
public int hashCode()
{
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey, legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser, extraCredentials);
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey,
legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser,
extraCredentials, legacyJsonCast);
}

public static Builder builder()
Expand All @@ -154,6 +165,7 @@ public static class Builder
private Locale sessionLocale;
private String sessionUser;
private boolean fieldNamesInJsonCastEnabled;
private boolean legacyJsonCast;
private Map<String, String> extraCredentials = emptyMap();

private Builder() {}
Expand Down Expand Up @@ -218,9 +230,26 @@ public Builder setFieldNamesInJsonCastEnabled(boolean fieldNamesInJsonCastEnable
return this;
}

public Builder setLegacyJsonCast(boolean legacyJsonCast)
{
this.legacyJsonCast = legacyJsonCast;
return this;
}

public SqlFunctionProperties build()
{
return new SqlFunctionProperties(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey, legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser, fieldNamesInJsonCastEnabled, extraCredentials);
return new SqlFunctionProperties(
parseDecimalLiteralAsDouble,
legacyRowFieldOrdinalAccessEnabled,
timeZoneKey,
legacyTimestamp,
legacyMapSubscript,
sessionStartTime,
sessionLocale,
sessionUser,
fieldNamesInJsonCastEnabled,
legacyJsonCast,
extraCredentials);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public static Field field(Type type)
return new Field(Optional.empty(), type);
}

public static Field field(String name, Type type, boolean delimited)
{
return new Field(Optional.of(name), type, delimited);
}

private static TypeSignature makeSignature(List<Field> fields)
{
int size = fields.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.facebook.drift.annotations.ThriftConstructor;
import com.facebook.drift.annotations.ThriftField;
import com.facebook.drift.annotations.ThriftStruct;
import com.facebook.presto.common.InvalidTypeDefinitionException;
import com.facebook.presto.common.QualifiedObjectName;
import com.facebook.presto.common.type.BigintEnumType.LongEnumMap;
import com.facebook.presto.common.type.VarcharEnumType.VarcharEnumMap;
Expand Down Expand Up @@ -510,6 +511,7 @@ private static TypeSignature parseRowTypeSignature(String signature, Set<String>

List<TypeSignatureParameter> fields = new ArrayList<>();

Set<String> distinctFieldNames = new HashSet<>();
for (int i = StandardTypes.ROW.length() + 1; i < signature.length(); i++) {
char c = signature.charAt(i);
switch (state) {
Expand Down Expand Up @@ -556,13 +558,19 @@ else if (c == ')' && bracketLevel > 1) {
}
else if (c == ')') {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
fields.add(parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters));
TypeSignatureParameter parameter = parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters);
parameter.getNamedTypeSignature().getName()
.ifPresent(fieldName -> checkDuplicateAndAdd(distinctFieldNames, fieldName));
fields.add(parameter);
tokenStart = -1;
state = RowTypeSignatureParsingState.FINISHED;
}
else if (c == ',' && bracketLevel == 1) {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
fields.add(parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters));
TypeSignatureParameter parameter = parseTypeOrNamedType(signature.substring(tokenStart, i).trim(), literalParameters);
parameter.getNamedTypeSignature().getName()
.ifPresent(fieldName -> checkDuplicateAndAdd(distinctFieldNames, fieldName));
fields.add(parameter);
tokenStart = -1;
state = RowTypeSignatureParsingState.START_OF_FIELD;
}
Expand All @@ -578,6 +586,7 @@ else if (c == ')' && bracketLevel > 1) {
else if (c == ')') {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
verify(delimitedColumnName != null, "Expect delimitedColumnName to be non-null");
checkDuplicateAndAdd(distinctFieldNames, delimitedColumnName);
fields.add(TypeSignatureParameter.of(new NamedTypeSignature(
Optional.of(new RowFieldName(delimitedColumnName, true)),
parseTypeSignature(signature.substring(tokenStart, i).trim(), literalParameters))));
Expand All @@ -588,6 +597,7 @@ else if (c == ')') {
else if (c == ',' && bracketLevel == 1) {
verify(tokenStart >= 0, "Expect tokenStart to be non-negative");
verify(delimitedColumnName != null, "Expect delimitedColumnName to be non-null");
checkDuplicateAndAdd(distinctFieldNames, delimitedColumnName);
fields.add(TypeSignatureParameter.of(new NamedTypeSignature(
Optional.of(new RowFieldName(delimitedColumnName, true)),
parseTypeSignature(signature.substring(tokenStart, i).trim(), literalParameters))));
Expand All @@ -609,6 +619,13 @@ else if (c == ',' && bracketLevel == 1) {
return new TypeSignature(signature.substring(0, StandardTypes.ROW.length()), fields);
}

private static void checkDuplicateAndAdd(Set<String> fieldNames, String fieldName)
{
if (!fieldNames.add(fieldName)) {
throw new InvalidTypeDefinitionException("Duplicate field: " + fieldName);
}
}

private static TypeSignatureParameter parseTypeOrNamedType(String typeOrNamedType, Set<String> literalParameters)
{
int split = typeOrNamedType.indexOf(' ');
Expand Down
13 changes: 13 additions & 0 deletions presto-docs/src/main/sphinx/admin/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -989,3 +989,16 @@ Logging Properties
* **Default value:** ``100MB``

The maximum file size for the log file of the HTTP server.


Legacy Compatible Properties
------------------------------

``legacy_json_cast``
^^^^^^^^^^^^^^^^^^^^^

* **Type:** ``boolean``
* **Default value:** ``true``

When casting from ``JSON`` to ``ROW``, ignore the case of field names in ``RowType`` for legacy support so that the matching is case-insensitive.
Set ``legacy_json_cast`` to ``false`` to strictly enforce the case-sensitivity of double quoted field names in ``RowType`` when matching. Matching for unquoted field names remains case-insensitive.
24 changes: 24 additions & 0 deletions presto-docs/src/main/sphinx/functions/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,30 @@ Cast from JSON
SELECT CAST(JSON '{"k1": [1, 23], "k2": 456}' AS MAP(VARCHAR, JSON)); -- {k1 = JSON '[1,23]', k2 = JSON '456'}
SELECT CAST(JSON '[null]' AS ARRAY(JSON)); -- [JSON 'null']

.. note::

When casting from ``JSON`` to ``ROW``, for legacy support the case of double quoted field names
in ``RowType`` is ignored when matching. For example::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, "V3" BOOLEAN)); -- {v1=123, V2=abc, V3=true}

The following statement returns an error due to duplicate field::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v2":"abc2","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, v2 VARCHAR, "V3" BOOLEAN));

To enforce the case of field names in ``RowType`` when casting from ``JSON`` to ``ROW``, set the configuration property ``legacy_json_cast`` to ``false``
in the coordinator and the worker's `configuration properties <../admin/properties.html#legacy-compatible-properties>`_.
After setting the property, the matching is case-sensitive for double quoted field names and
remains case-insensitive for unquoted field names. For example::

SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "V2" VARCHAR, v3 BOOLEAN)); -- {v1=123, V2=abc, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc","v3":true}' AS ROW(v1 BIGINT, "v2" VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=null, v3=true}
SELECT CAST(JSON '{"v1":123,"V2":"abc", "v2":"abc2","v3":true}' AS ROW(v1 BIGINT, v2 VARCHAR, "V2" VARCHAR, v3 BOOLEAN)); -- {v1=123, v2=abc2, V2=abc, v3=true}

If the name of a field does not match (including case sensitivity), the value is ``null``.

.. note:: When casting from ``JSON`` to ``ROW``, both JSON array and JSON object are supported.

JSON Functions
Expand Down
3 changes: 3 additions & 0 deletions presto-main/src/main/java/com/facebook/presto/Session.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import java.util.TimeZone;
import java.util.stream.Collectors;

import static com.facebook.presto.SystemSessionProperties.LEGACY_JSON_CAST;
import static com.facebook.presto.SystemSessionProperties.isFieldNameInJsonCastEnabled;
import static com.facebook.presto.SystemSessionProperties.isLegacyMapSubscript;
import static com.facebook.presto.SystemSessionProperties.isLegacyRowFieldOrdinalAccessEnabled;
Expand Down Expand Up @@ -496,6 +497,7 @@ public ConnectorSession toConnectorSession()

public SqlFunctionProperties getSqlFunctionProperties()
{
boolean legacyJsonCast = this.sessionPropertyManager.decodeSystemPropertyValue(LEGACY_JSON_CAST, null, Boolean.class);
return SqlFunctionProperties.builder()
.setTimeZoneKey(timeZoneKey)
.setLegacyRowFieldOrdinalAccessEnabled(isLegacyRowFieldOrdinalAccessEnabled(this))
Expand All @@ -506,6 +508,7 @@ public SqlFunctionProperties getSqlFunctionProperties()
.setSessionLocale(getLocale())
.setSessionUser(getUser())
.setFieldNamesInJsonCastEnabled(isFieldNameInJsonCastEnabled(this))
.setLegacyJsonCast(legacyJsonCast)
.setExtraCredentials(identity.getExtraCredentials())
.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ public final class SystemSessionProperties
public static final String ADD_PARTIAL_NODE_FOR_ROW_NUMBER_WITH_LIMIT = "add_partial_node_for_row_number_with_limit";
public static final String REWRITE_CASE_TO_MAP_ENABLED = "rewrite_case_to_map_enabled";
public static final String FIELD_NAMES_IN_JSON_CAST_ENABLED = "field_names_in_json_cast_enabled";
public static final String LEGACY_JSON_CAST = "legacy_json_cast";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CC: @pranjalssh @tdcmeehan @majetideepak @aditi-pandit @spershin

We may need to add logic to check this session property and fail fast if native workers do not support it. Otherwise, we'll get wrong results when processing queries with Prestissimo.

public static final String PULL_EXPRESSION_FROM_LAMBDA_ENABLED = "pull_expression_from_lambda_enabled";
public static final String REWRITE_CONSTANT_ARRAY_CONTAINS_TO_IN_EXPRESSION = "rewrite_constant_array_contains_to_in_expression";
public static final String INFER_INEQUALITY_PREDICATES = "infer_inequality_predicates";
Expand Down Expand Up @@ -1754,6 +1755,11 @@ public SystemSessionProperties(
"Include field names in json output when casting rows",
featuresConfig.isFieldNamesInJsonCastEnabled(),
false),
booleanProperty(
LEGACY_JSON_CAST,
"Keep the legacy json cast behavior, do not reserve the case for field names when casting to row type",
featuresConfig.isLegacyJsonCast(),
false),
booleanProperty(
OPTIMIZE_JOIN_PROBE_FOR_EMPTY_BUILD_RUNTIME,
"Optimize join probe at runtime if build side is empty",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public static Block toArray(ArrayType arrayType, BlockBuilderAppender elementApp
}
BlockBuilder blockBuilder = arrayType.getElementType().createBlockBuilder(null, 20);
while (jsonParser.nextToken() != JsonToken.END_ARRAY) {
elementAppender.append(jsonParser, blockBuilder);
elementAppender.append(jsonParser, blockBuilder, properties);
}
if (jsonParser.nextToken() != null) {
throw new JsonCastException(format("Unexpected trailing token: %s", jsonParser.getText()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ public static Block toMap(MapType mapType, BlockBuilderAppender keyAppender, Blo
HashTable hashTable = new HashTable(mapType.getKeyType(), singleMapBlockBuilder);
int position = 0;
while (jsonParser.nextToken() != JsonToken.END_OBJECT) {
keyAppender.append(jsonParser, singleMapBlockBuilder);
keyAppender.append(jsonParser, singleMapBlockBuilder, properties);
jsonParser.nextToken();
valueAppender.append(jsonParser, singleMapBlockBuilder);
valueAppender.append(jsonParser, singleMapBlockBuilder, properties);

// Duplicate key detection is required even if the JSON is valid.
// For example: CAST(JSON '{"1": 1, "01": 2}' AS MAP<INTEGER, INTEGER>).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@

import java.lang.invoke.MethodHandle;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.common.type.TypeSignature.parseTypeSignature;
import static com.facebook.presto.operator.scalar.ScalarFunctionImplementationChoice.ArgumentProperty.valueTypeArgumentProperty;
Expand All @@ -48,7 +46,6 @@
import static com.facebook.presto.util.JsonUtil.JSON_FACTORY;
import static com.facebook.presto.util.JsonUtil.canCastFromJson;
import static com.facebook.presto.util.JsonUtil.createJsonParser;
import static com.facebook.presto.util.JsonUtil.getFieldNameToIndex;
import static com.facebook.presto.util.JsonUtil.parseJsonToSingleRowBlock;
import static com.facebook.presto.util.JsonUtil.truncateIfNecessaryForErrorMessage;
import static com.facebook.presto.util.Reflection.methodHandle;
Expand All @@ -61,7 +58,7 @@ public class JsonToRowCast
extends SqlOperator
{
public static final JsonToRowCast JSON_TO_ROW = new JsonToRowCast();
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender[].class, Optional.class, SqlFunctionProperties.class, Slice.class);
private static final MethodHandle METHOD_HANDLE = methodHandle(JsonToRowCast.class, "toRow", RowType.class, BlockBuilderAppender[].class, SqlFunctionProperties.class, Slice.class);

private JsonToRowCast()
{
Expand All @@ -83,7 +80,7 @@ public BuiltInScalarFunctionImplementation specialize(BoundVariables boundVariab
BlockBuilderAppender[] fieldAppenders = rowFields.stream()
.map(rowField -> createBlockBuilderAppender(rowField.getType()))
.toArray(BlockBuilderAppender[]::new);
MethodHandle methodHandle = METHOD_HANDLE.bindTo(rowType).bindTo(fieldAppenders).bindTo(getFieldNameToIndex(rowFields));
MethodHandle methodHandle = METHOD_HANDLE.bindTo(rowType).bindTo(fieldAppenders);
return new BuiltInScalarFunctionImplementation(
true,
ImmutableList.of(valueTypeArgumentProperty(RETURN_NULL_ON_NULL)),
Expand All @@ -94,7 +91,6 @@ public BuiltInScalarFunctionImplementation specialize(BoundVariables boundVariab
public static Block toRow(
RowType rowType,
BlockBuilderAppender[] fieldAppenders,
Optional<Map<String, Integer>> fieldNameToIndex,
SqlFunctionProperties properties,
Slice json)
{
Expand All @@ -113,7 +109,8 @@ public static Block toRow(
jsonParser,
(SingleRowBlockWriter) rowBlockBuilder.beginBlockEntry(),
fieldAppenders,
fieldNameToIndex);
rowType,
properties);
rowBlockBuilder.closeEntry();

if (jsonParser.nextToken() != null) {
Expand Down
Loading
Loading