From 44f012c248df61a4dfb9a572cd718b2783c10e94 Mon Sep 17 00:00:00 2001 From: praveenkrishna Date: Tue, 5 Sep 2023 16:03:42 +0530 Subject: [PATCH 1/2] Introduce CoercionContext This abstraction captures all additional information related to coercing column --- .../io/trino/plugin/hive/HivePageSource.java | 7 +-- .../plugin/hive/HivePageSourceProvider.java | 13 ++--- .../plugin/hive/coercions/CoercionUtils.java | 54 +++++++++++-------- .../hive/coercions/TestTimestampCoercer.java | 3 +- 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java index 2556bca48fb3..9946d06940f9 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java @@ -17,6 +17,7 @@ import io.trino.filesystem.Location; import io.trino.plugin.hive.HivePageSourceProvider.BucketAdaptation; import io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping; +import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext; import io.trino.plugin.hive.coercions.TypeCoercer; import io.trino.plugin.hive.type.TypeInfo; import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion; @@ -78,12 +79,12 @@ public HivePageSource( Optional bucketValidator, Optional projectionsAdapter, TypeManager typeManager, - HiveTimestampPrecision timestampPrecision, + CoercionContext coercionContext, ConnectorPageSource delegate) { requireNonNull(columnMappings, "columnMappings is null"); requireNonNull(typeManager, "typeManager is null"); - requireNonNull(timestampPrecision, "timestampPrecision is null"); + requireNonNull(coercionContext, "coercionContext is null"); this.delegate = requireNonNull(delegate, "delegate is null"); this.columnMappings = columnMappings; @@ -111,7 +112,7 @@ public HivePageSource( .orElse(ImmutableList.of()); HiveType fromType = columnMapping.getBaseTypeCoercionFrom().get().getHiveTypeForDereferences(dereferenceIndices).get(); HiveType toType = columnMapping.getHiveColumnHandle().getHiveType(); - coercers.add(createCoercer(typeManager, fromType, toType, timestampPrecision)); + coercers.add(createCoercer(typeManager, fromType, toType, coercionContext)); } else { coercers.add(Optional.empty()); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 709cd854d971..22ec145f3737 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -23,6 +23,7 @@ import io.trino.plugin.hive.HiveSplit.BucketConversion; import io.trino.plugin.hive.HiveSplit.BucketValidation; import io.trino.plugin.hive.acid.AcidTransaction; +import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext; import io.trino.plugin.hive.type.TypeInfo; import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion; import io.trino.spi.TrinoException; @@ -190,10 +191,10 @@ public static Optional createHivePageSource( Optional bucketAdaptation = createBucketAdaptation(bucketConversion, tableBucketNumber, regularAndInterimColumnMappings); Optional bucketValidator = createBucketValidator(path, bucketValidation, tableBucketNumber, regularAndInterimColumnMappings); - HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session); + CoercionContext coercionContext = new CoercionContext(getTimestampPrecision(session)); for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) { - List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, typeManager, timestampPrecision); + List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, typeManager, coercionContext); Optional readerWithProjections = pageSourceFactory.createPageSource( session, @@ -224,7 +225,7 @@ public static Optional createHivePageSource( bucketValidator, adapter, typeManager, - timestampPrecision, + coercionContext, pageSource)); } } @@ -473,7 +474,7 @@ public static List extractRegularAndInterimColumnMappings(List toColumnHandles(List regularColumnMappings, TypeManager typeManager, HiveTimestampPrecision timestampPrecision) + public static List toColumnHandles(List regularColumnMappings, TypeManager typeManager, CoercionContext coercionContext) { return regularColumnMappings.stream() .map(columnMapping -> { @@ -489,14 +490,14 @@ public static List toColumnHandles(List regular projectedColumn.getDereferenceIndices(), projectedColumn.getDereferenceNames(), fromHiveType, - createTypeFromCoercer(typeManager, fromHiveType, columnHandle.getHiveType(), timestampPrecision)); + createTypeFromCoercer(typeManager, fromHiveType, columnHandle.getHiveType(), coercionContext)); }); return new HiveColumnHandle( columnHandle.getBaseColumnName(), columnHandle.getBaseHiveColumnIndex(), fromHiveTypeBase, - createTypeFromCoercer(typeManager, fromHiveTypeBase, columnHandle.getBaseHiveType(), timestampPrecision), + createTypeFromCoercer(typeManager, fromHiveTypeBase, columnHandle.getBaseHiveType(), coercionContext), newColumnProjectionInfo, columnHandle.getColumnType(), columnHandle.getComment()); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java index edc691de3c34..363ec7c3ecf8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java @@ -71,21 +71,21 @@ public final class CoercionUtils { private CoercionUtils() {} - public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, HiveTimestampPrecision timestampPrecision) + public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, CoercionContext coercionContext) { - return createCoercer(typeManager, fromHiveType, toHiveType, timestampPrecision) + return createCoercer(typeManager, fromHiveType, toHiveType, coercionContext) .map(TypeCoercer::getFromType) - .orElseGet(() -> fromHiveType.getType(typeManager, timestampPrecision)); + .orElseGet(() -> fromHiveType.getType(typeManager, coercionContext.timestampPrecision())); } - public static Optional> createCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, HiveTimestampPrecision timestampPrecision) + public static Optional> createCoercer(TypeManager typeManager, HiveType fromHiveType, HiveType toHiveType, CoercionContext coercionContext) { if (fromHiveType.equals(toHiveType)) { return Optional.empty(); } - Type fromType = fromHiveType.getType(typeManager, timestampPrecision); - Type toType = toHiveType.getType(typeManager, timestampPrecision); + Type fromType = fromHiveType.getType(typeManager, coercionContext.timestampPrecision()); + Type toType = toHiveType.getType(typeManager, coercionContext.timestampPrecision()); if (toType instanceof VarcharType toVarcharType && (fromHiveType.equals(HIVE_BYTE) || fromHiveType.equals(HIVE_SHORT) || fromHiveType.equals(HIVE_INT) || fromHiveType.equals(HIVE_LONG))) { return Optional.of(new IntegerNumberToVarcharCoercer<>(fromType, toVarcharType)); @@ -152,14 +152,14 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH typeManager, (ListTypeInfo) fromHiveType.getTypeInfo(), (ListTypeInfo) toHiveType.getTypeInfo(), - timestampPrecision); + coercionContext); } if ((fromType instanceof MapType) && (toType instanceof MapType)) { return createCoercerForMap( typeManager, (MapTypeInfo) fromHiveType.getTypeInfo(), (MapTypeInfo) toHiveType.getTypeInfo(), - timestampPrecision); + coercionContext); } if ((fromType instanceof RowType) && (toType instanceof RowType)) { HiveType fromHiveTypeStruct = (fromHiveType.getCategory() == Category.UNION) ? HiveType.toHiveType(fromType) : fromHiveType; @@ -169,7 +169,7 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH typeManager, (StructTypeInfo) fromHiveTypeStruct.getTypeInfo(), (StructTypeInfo) toHiveTypeStruct.getTypeInfo(), - timestampPrecision); + coercionContext); } throw new TrinoException(NOT_SUPPORTED, format("Unsupported coercion from %s to %s", fromHiveType, toHiveType)); @@ -196,12 +196,12 @@ public static boolean narrowerThan(CharType first, CharType second) TypeManager typeManager, ListTypeInfo fromListTypeInfo, ListTypeInfo toListTypeInfo, - HiveTimestampPrecision timestampPrecision) + CoercionContext coercionContext) { HiveType fromElementHiveType = HiveType.valueOf(fromListTypeInfo.getListElementTypeInfo().getTypeName()); HiveType toElementHiveType = HiveType.valueOf(toListTypeInfo.getListElementTypeInfo().getTypeName()); - return createCoercer(typeManager, fromElementHiveType, toElementHiveType, timestampPrecision) + return createCoercer(typeManager, fromElementHiveType, toElementHiveType, coercionContext) .map(elementCoercer -> new ListCoercer(new ArrayType(elementCoercer.getFromType()), new ArrayType(elementCoercer.getToType()), elementCoercer)); } @@ -209,22 +209,22 @@ public static boolean narrowerThan(CharType first, CharType second) TypeManager typeManager, MapTypeInfo fromMapTypeInfo, MapTypeInfo toMapTypeInfo, - HiveTimestampPrecision timestampPrecision) + CoercionContext coercionContext) { HiveType fromKeyHiveType = HiveType.valueOf(fromMapTypeInfo.getMapKeyTypeInfo().getTypeName()); HiveType fromValueHiveType = HiveType.valueOf(fromMapTypeInfo.getMapValueTypeInfo().getTypeName()); HiveType toKeyHiveType = HiveType.valueOf(toMapTypeInfo.getMapKeyTypeInfo().getTypeName()); HiveType toValueHiveType = HiveType.valueOf(toMapTypeInfo.getMapValueTypeInfo().getTypeName()); - Optional> keyCoercer = createCoercer(typeManager, fromKeyHiveType, toKeyHiveType, timestampPrecision); - Optional> valueCoercer = createCoercer(typeManager, fromValueHiveType, toValueHiveType, timestampPrecision); + Optional> keyCoercer = createCoercer(typeManager, fromKeyHiveType, toKeyHiveType, coercionContext); + Optional> valueCoercer = createCoercer(typeManager, fromValueHiveType, toValueHiveType, coercionContext); MapType fromType = new MapType( - keyCoercer.map(TypeCoercer::getFromType).orElseGet(() -> fromKeyHiveType.getType(typeManager, timestampPrecision)), - valueCoercer.map(TypeCoercer::getFromType).orElseGet(() -> fromValueHiveType.getType(typeManager, timestampPrecision)), + keyCoercer.map(TypeCoercer::getFromType).orElseGet(() -> fromKeyHiveType.getType(typeManager, coercionContext.timestampPrecision())), + valueCoercer.map(TypeCoercer::getFromType).orElseGet(() -> fromValueHiveType.getType(typeManager, coercionContext.timestampPrecision())), typeManager.getTypeOperators()); MapType toType = new MapType( - keyCoercer.map(TypeCoercer::getToType).orElseGet(() -> toKeyHiveType.getType(typeManager, timestampPrecision)), - valueCoercer.map(TypeCoercer::getToType).orElseGet(() -> toValueHiveType.getType(typeManager, timestampPrecision)), + keyCoercer.map(TypeCoercer::getToType).orElseGet(() -> toKeyHiveType.getType(typeManager, coercionContext.timestampPrecision())), + valueCoercer.map(TypeCoercer::getToType).orElseGet(() -> toValueHiveType.getType(typeManager, coercionContext.timestampPrecision())), typeManager.getTypeOperators()); return Optional.of(new MapCoercer(fromType, toType, keyCoercer, valueCoercer)); @@ -234,7 +234,7 @@ public static boolean narrowerThan(CharType first, CharType second) TypeManager typeManager, StructTypeInfo fromStructTypeInfo, StructTypeInfo toStructTypeInfo, - HiveTimestampPrecision timestampPrecision) + CoercionContext coercionContext) { ImmutableList.Builder>> coercers = ImmutableList.builder(); ImmutableList.Builder fromField = ImmutableList.builder(); @@ -248,20 +248,20 @@ public static boolean narrowerThan(CharType first, CharType second) if (i >= fromStructFieldName.size()) { toField.add(new Field( Optional.of(toStructFieldNames.get(i)), - toStructFieldType.getType(typeManager, timestampPrecision))); + toStructFieldType.getType(typeManager, coercionContext.timestampPrecision()))); coercers.add(Optional.empty()); } else { HiveType fromStructFieldType = HiveType.valueOf(fromStructTypeInfo.getAllStructFieldTypeInfos().get(i).getTypeName()); - Optional> coercer = createCoercer(typeManager, fromStructFieldType, toStructFieldType, timestampPrecision); + Optional> coercer = createCoercer(typeManager, fromStructFieldType, toStructFieldType, coercionContext); fromField.add(new Field( Optional.of(fromStructFieldName.get(i)), - coercer.map(TypeCoercer::getFromType).orElseGet(() -> fromStructFieldType.getType(typeManager, timestampPrecision)))); + coercer.map(TypeCoercer::getFromType).orElseGet(() -> fromStructFieldType.getType(typeManager, coercionContext.timestampPrecision())))); toField.add(new Field( Optional.of(toStructFieldNames.get(i)), - coercer.map(TypeCoercer::getToType).orElseGet(() -> toStructFieldType.getType(typeManager, timestampPrecision)))); + coercer.map(TypeCoercer::getToType).orElseGet(() -> toStructFieldType.getType(typeManager, coercionContext.timestampPrecision())))); coercers.add(coercer); } @@ -394,4 +394,12 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos throw new UnsupportedOperationException("Not supported"); } } + + public record CoercionContext(HiveTimestampPrecision timestampPrecision) + { + public CoercionContext + { + requireNonNull(timestampPrecision, "timestampPrecision is null"); + } + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java index 0bd6f48be96b..6fe02d0bbd31 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive.coercions; import io.trino.plugin.hive.HiveTimestampPrecision; +import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.type.LongTimestamp; @@ -221,7 +222,7 @@ public static void assertVarcharToLongTimestampCoercions(Type fromType, Object v public static void assertCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue, HiveTimestampPrecision timestampPrecision) { - Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), timestampPrecision).orElseThrow() + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), new CoercionContext(timestampPrecision)).orElseThrow() .apply(nativeValueToBlock(fromType, valueToBeCoerced)); assertThat(blockToNativeValue(toType, coercedValue)) .isEqualTo(expectedValue); From 142e6c91a97e29377ae52ccfae38ff35e1361704 Mon Sep 17 00:00:00 2001 From: praveenkrishna Date: Wed, 16 Aug 2023 16:15:51 +0530 Subject: [PATCH 2/2] Add support for double to varchar coercion in hive tables --- .../plugin/hive/HivePageSourceProvider.java | 5 +- .../plugin/hive/coercions/CoercionUtils.java | 5 +- .../coercions/DoubleToVarcharCoercer.java | 56 ++++++++++++ .../plugin/hive/orc/OrcTypeTranslator.java | 5 ++ .../plugin/hive/util/HiveCoercionPolicy.java | 8 +- .../TestDoubleToVarcharCoercions.java | 89 +++++++++++++++++++ .../hive/coercions/TestTimestampCoercer.java | 2 +- .../product/hive/BaseTestHiveCoercion.java | 27 ++++++ .../TestHiveCoercionOnPartitionedTable.java | 3 + .../TestHiveCoercionOnUnpartitionedTable.java | 3 + 10 files changed, 199 insertions(+), 4 deletions(-) create mode 100644 plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DoubleToVarcharCoercer.java create mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 22ec145f3737..113c17480f31 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -69,6 +69,7 @@ import static io.trino.plugin.hive.coercions.CoercionUtils.createTypeFromCoercer; import static io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter; import static io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter; +import static io.trino.plugin.hive.util.HiveClassNames.ORC_SERDE_CLASS; import static io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName; import static io.trino.plugin.hive.util.HiveUtil.getInputFormatName; import static io.trino.plugin.hive.util.HiveUtil.getPrefilledColumnValue; @@ -191,7 +192,9 @@ public static Optional createHivePageSource( Optional bucketAdaptation = createBucketAdaptation(bucketConversion, tableBucketNumber, regularAndInterimColumnMappings); Optional bucketValidator = createBucketValidator(path, bucketValidation, tableBucketNumber, regularAndInterimColumnMappings); - CoercionContext coercionContext = new CoercionContext(getTimestampPrecision(session)); + // Apache Hive reads Double.NaN as null when coerced to varchar for ORC file format + boolean treatNaNAsNull = ORC_SERDE_CLASS.equals(getDeserializerClassName(schema)); + CoercionContext coercionContext = new CoercionContext(getTimestampPrecision(session), treatNaNAsNull); for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) { List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, typeManager, coercionContext); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java index 363ec7c3ecf8..6272ea495f82 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java @@ -147,6 +147,9 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH if (fromType instanceof TimestampType && toType instanceof VarcharType varcharType) { return Optional.of(new TimestampCoercer.LongTimestampToVarcharCoercer(TIMESTAMP_NANOS, varcharType)); } + if (fromType == DOUBLE && toType instanceof VarcharType toVarcharType) { + return Optional.of(new DoubleToVarcharCoercer(toVarcharType, coercionContext.treatNaNAsNull())); + } if ((fromType instanceof ArrayType) && (toType instanceof ArrayType)) { return createCoercerForList( typeManager, @@ -395,7 +398,7 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos } } - public record CoercionContext(HiveTimestampPrecision timestampPrecision) + public record CoercionContext(HiveTimestampPrecision timestampPrecision, boolean treatNaNAsNull) { public CoercionContext { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DoubleToVarcharCoercer.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DoubleToVarcharCoercer.java new file mode 100644 index 000000000000..2d0e13576eb1 --- /dev/null +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DoubleToVarcharCoercer.java @@ -0,0 +1,56 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.coercions; + +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import io.trino.spi.TrinoException; +import io.trino.spi.block.Block; +import io.trino.spi.block.BlockBuilder; +import io.trino.spi.type.DoubleType; +import io.trino.spi.type.VarcharType; + +import static io.airlift.slice.SliceUtf8.countCodePoints; +import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static java.lang.String.format; + +public class DoubleToVarcharCoercer + extends TypeCoercer +{ + private final boolean treatNaNAsNull; + + public DoubleToVarcharCoercer(VarcharType toType, boolean treatNaNAsNull) + { + super(DOUBLE, toType); + this.treatNaNAsNull = treatNaNAsNull; + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + double doubleValue = DOUBLE.getDouble(block, position); + + if (Double.isNaN(doubleValue) && treatNaNAsNull) { + blockBuilder.appendNull(); + return; + } + + Slice converted = Slices.utf8Slice(Double.toString(doubleValue)); + if (!toType.isUnbounded() && countCodePoints(converted) > toType.getBoundedLength()) { + throw new TrinoException(INVALID_ARGUMENTS, format("Varchar representation of %s exceeds %s bounds", doubleValue, toType)); + } + toType.writeSlice(blockBuilder, converted); + } +} diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java index 1167255a7711..dd3948cc0683 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive.orc; import io.trino.orc.metadata.OrcType.OrcTypeKind; +import io.trino.plugin.hive.coercions.DoubleToVarcharCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToLongTimestampCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToShortTimestampCoercer; @@ -24,6 +25,7 @@ import java.util.Optional; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.DOUBLE; import static io.trino.orc.metadata.OrcType.OrcTypeKind.STRING; import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP; import static io.trino.orc.metadata.OrcType.OrcTypeKind.VARCHAR; @@ -45,6 +47,9 @@ private OrcTypeTranslator() {} } return Optional.of(new VarcharToLongTimestampCoercer(createUnboundedVarcharType(), timestampType)); } + if (fromOrcType == DOUBLE && toTrinoType instanceof VarcharType varcharType) { + return Optional.of(new DoubleToVarcharCoercer(varcharType, true)); + } return Optional.empty(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java index 927c1b899e19..8eb084934f07 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java @@ -70,7 +70,13 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType, HiveTimest return toType instanceof CharType; } if (toType instanceof VarcharType) { - return fromHiveType.equals(HIVE_BYTE) || fromHiveType.equals(HIVE_SHORT) || fromHiveType.equals(HIVE_INT) || fromHiveType.equals(HIVE_LONG) || fromHiveType.equals(HIVE_TIMESTAMP) || fromType instanceof DecimalType; + return fromHiveType.equals(HIVE_BYTE) || + fromHiveType.equals(HIVE_SHORT) || + fromHiveType.equals(HIVE_INT) || + fromHiveType.equals(HIVE_LONG) || + fromHiveType.equals(HIVE_TIMESTAMP) || + fromHiveType.equals(HIVE_DOUBLE) || + fromType instanceof DecimalType; } if (fromHiveType.equals(HIVE_BYTE)) { return toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java new file mode 100644 index 000000000000..526c085e463e --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDoubleToVarcharCoercions.java @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.coercions; + +import io.airlift.slice.Slices; +import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext; +import io.trino.spi.TrinoException; +import io.trino.spi.block.Block; +import io.trino.spi.type.Type; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.stream.Stream; + +import static io.trino.plugin.hive.HiveTimestampPrecision.DEFAULT_PRECISION; +import static io.trino.plugin.hive.HiveType.toHiveType; +import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer; +import static io.trino.spi.predicate.Utils.blockToNativeValue; +import static io.trino.spi.predicate.Utils.nativeValueToBlock; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; +import static io.trino.spi.type.VarcharType.createVarcharType; +import static io.trino.testing.DataProviders.cartesianProduct; +import static io.trino.testing.DataProviders.toDataProvider; +import static io.trino.testing.DataProviders.trueFalse; +import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class TestDoubleToVarcharCoercions +{ + @Test(dataProvider = "doubleValues") + public void testDoubleToVarcharCoercions(Double doubleValue, boolean treatNaNAsNull) + { + assertCoercions(DOUBLE, doubleValue, createUnboundedVarcharType(), Slices.utf8Slice(doubleValue.toString()), treatNaNAsNull); + } + + @Test(dataProvider = "doubleValues") + public void testDoubleSmallerVarcharCoercions(Double doubleValue, boolean treatNaNAsNull) + { + assertThatThrownBy(() -> assertCoercions(DOUBLE, doubleValue, createVarcharType(1), doubleValue.toString(), treatNaNAsNull)) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Varchar representation of %s exceeds varchar(1) bounds", doubleValue); + } + + @Test + public void testNaNToVarcharCoercions() + { + assertCoercions(DOUBLE, Double.NaN, createUnboundedVarcharType(), null, true); + + assertCoercions(DOUBLE, Double.NaN, createUnboundedVarcharType(), Slices.utf8Slice("NaN"), false); + assertThatThrownBy(() -> assertCoercions(DOUBLE, Double.NaN, createVarcharType(1), "NaN", false)) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Varchar representation of NaN exceeds varchar(1) bounds"); + } + + @DataProvider + public Object[][] doubleValues() + { + return cartesianProduct( + Stream.of( + Double.NEGATIVE_INFINITY, + Double.MIN_VALUE, + Double.MAX_VALUE, + Double.POSITIVE_INFINITY, + Double.parseDouble("123456789.12345678")) + .collect(toDataProvider()), + trueFalse()); + } + + public static void assertCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue, boolean treatNaNAsNull) + { + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), new CoercionContext(DEFAULT_PRECISION, treatNaNAsNull)).orElseThrow() + .apply(nativeValueToBlock(fromType, valueToBeCoerced)); + assertThat(blockToNativeValue(toType, coercedValue)) + .isEqualTo(expectedValue); + } +} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java index 6fe02d0bbd31..cb1872cc79e8 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java @@ -222,7 +222,7 @@ public static void assertVarcharToLongTimestampCoercions(Type fromType, Object v public static void assertCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue, HiveTimestampPrecision timestampPrecision) { - Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), new CoercionContext(timestampPrecision)).orElseThrow() + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), new CoercionContext(timestampPrecision, false)).orElseThrow() .apply(nativeValueToBlock(fromType, valueToBeCoerced)); assertThat(blockToNativeValue(toType, coercedValue)) .isEqualTo(expectedValue); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java index 5302bab0bcb3..324d2e0f5f64 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java @@ -106,6 +106,9 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition) "bigint_to_varchar", "float_to_double", "double_to_float", + "double_to_string", + "double_to_bounded_varchar", + "double_infinity_to_string", "shortdecimal_to_shortdecimal", "shortdecimal_to_longdecimal", "longdecimal_to_shortdecimal", @@ -167,6 +170,9 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " 12345, " + " REAL '0.5', " + " DOUBLE '0.5', " + + " DOUBLE '12345.12345', " + + " DOUBLE '12345.12345', " + + " DOUBLE 'Infinity' ," + " DECIMAL '12345678.12', " + " DECIMAL '12345678.12', " + " DECIMAL '12345678.123456123456', " + @@ -202,6 +208,9 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " -12345, " + " REAL '-1.5', " + " DOUBLE '-1.5', " + + " DOUBLE 'NaN', " + + " DOUBLE '-12345.12345', " + + " DOUBLE '-Infinity' ," + " DECIMAL '-12345678.12', " + " DECIMAL '-12345678.12', " + " DECIMAL '-12345678.123456123456', " + @@ -231,6 +240,7 @@ protected void insertTableRows(String tableName, String floatToDoubleType) protected Map> expectedValuesForEngineProvider(Engine engine, String tableName, String decimalToFloatVal, String floatToDecimalVal) { String hiveValueForCaseChangeField; + String coercedNaN = "NaN"; Predicate isFormat = formatName -> tableName.toLowerCase(ENGLISH).contains(formatName); if (Stream.of("rctext", "textfile", "sequencefile").anyMatch(isFormat)) { hiveValueForCaseChangeField = "\"lower2uppercase\":2"; @@ -242,6 +252,11 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { hiveValueForCaseChangeField = "\"LOWER2UPPERCASE\":2"; } + // Apache Hive reads Double.NaN as null when coerced to varchar for ORC file format + if (isFormat.test("orc")) { + coercedNaN = null; + } + return ImmutableMap.>builder() .put("row_to_row", ImmutableList.of( engine == Engine.TRINO ? @@ -322,6 +337,9 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { 0.5, -1.5)) .put("double_to_float", ImmutableList.of(0.5, -1.5)) + .put("double_to_string", Arrays.asList("12345.12345", coercedNaN)) + .put("double_to_bounded_varchar", ImmutableList.of("12345.12345", "-12345.12345")) + .put("double_infinity_to_string", ImmutableList.of("Infinity", "-Infinity")) .put("shortdecimal_to_shortdecimal", ImmutableList.of( new BigDecimal("12345678.1200"), new BigDecimal("-12345678.1200"))) @@ -751,6 +769,9 @@ private void assertProperAlteredTableSchema(String tableName) row("bigint_to_varchar", "varchar"), row("float_to_double", "double"), row("double_to_float", floatType), + row("double_to_string", "varchar"), + row("double_to_bounded_varchar", "varchar(12)"), + row("double_infinity_to_string", "varchar"), row("shortdecimal_to_shortdecimal", "decimal(18,4)"), row("shortdecimal_to_longdecimal", "decimal(20,4)"), row("longdecimal_to_shortdecimal", "decimal(12,2)"), @@ -802,6 +823,9 @@ private void assertColumnTypes( .put("bigint_to_varchar", VARCHAR) .put("float_to_double", DOUBLE) .put("double_to_float", floatType) + .put("double_to_string", VARCHAR) + .put("double_to_bounded_varchar", VARCHAR) + .put("double_infinity_to_string", VARCHAR) .put("shortdecimal_to_shortdecimal", DECIMAL) .put("shortdecimal_to_longdecimal", DECIMAL) .put("longdecimal_to_shortdecimal", DECIMAL) @@ -852,6 +876,9 @@ private static void alterTableColumnTypes(String tableName) onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN bigint_to_varchar bigint_to_varchar string", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN float_to_double float_to_double double", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_float double_to_float %s", tableName, floatType)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_string double_to_string string", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_bounded_varchar double_to_bounded_varchar varchar(12)", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_infinity_to_string double_infinity_to_string string", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN shortdecimal_to_shortdecimal shortdecimal_to_shortdecimal DECIMAL(18,4)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN shortdecimal_to_longdecimal shortdecimal_to_longdecimal DECIMAL(20,4)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN longdecimal_to_shortdecimal longdecimal_to_shortdecimal DECIMAL(12,2)", tableName)); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java index 813894c18330..183701358f3c 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java @@ -112,6 +112,9 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui " bigint_to_varchar BIGINT," + " float_to_double " + floatType + "," + " double_to_float DOUBLE," + + " double_to_string DOUBLE," + + " double_to_bounded_varchar DOUBLE," + + " double_infinity_to_string DOUBLE," + " shortdecimal_to_shortdecimal DECIMAL(10,2)," + " shortdecimal_to_longdecimal DECIMAL(10,2)," + " longdecimal_to_shortdecimal DECIMAL(20,12)," + diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java index 358fb5836fb4..2943332df643 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java @@ -61,6 +61,9 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui bigint_to_varchar BIGINT, float_to_double FLOAT, double_to_float DOUBLE, + double_to_string DOUBLE, + double_to_bounded_varchar DOUBLE, + double_infinity_to_string DOUBLE, shortdecimal_to_shortdecimal DECIMAL(10,2), shortdecimal_to_longdecimal DECIMAL(10,2), longdecimal_to_shortdecimal DECIMAL(20,12),