diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/OrcReaderOptions.java b/presto-orc/src/main/java/com/facebook/presto/orc/OrcReaderOptions.java index b27990ed3d7bd..7d4571c192334 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/OrcReaderOptions.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/OrcReaderOptions.java @@ -16,10 +16,12 @@ import io.airlift.units.DataSize; import static com.google.common.base.MoreObjects.toStringHelper; +import static io.airlift.units.DataSize.Unit.GIGABYTE; import static java.util.Objects.requireNonNull; public class OrcReaderOptions { + private static final DataSize DEFAULT_MAX_SLICE_SIZE = new DataSize(1, GIGABYTE); private final DataSize maxMergeDistance; private final DataSize tinyStripeThreshold; private final DataSize maxBlockSize; @@ -27,6 +29,8 @@ public class OrcReaderOptions private final boolean mapNullKeysEnabled; // if the option is set to true, OrcSelectiveReader will append a row number block at the end of the page private final boolean appendRowNumber; + // slice reader will throw if the slice size is larger than this value + private final DataSize maxSliceSize; /** * Read column statistics for flat map columns. Usually there are quite a @@ -41,7 +45,8 @@ private OrcReaderOptions( boolean zstdJniDecompressionEnabled, boolean mapNullKeysEnabled, boolean appendRowNumber, - boolean readMapStatistics) + boolean readMapStatistics, + DataSize maxSliceSize) { this.maxMergeDistance = requireNonNull(maxMergeDistance, "maxMergeDistance is null"); this.maxBlockSize = requireNonNull(maxBlockSize, "maxBlockSize is null"); @@ -50,6 +55,7 @@ private OrcReaderOptions( this.mapNullKeysEnabled = mapNullKeysEnabled; this.appendRowNumber = appendRowNumber; this.readMapStatistics = readMapStatistics; + this.maxSliceSize = maxSliceSize; } public DataSize getMaxMergeDistance() @@ -87,6 +93,11 @@ public boolean readMapStatistics() return readMapStatistics; } + public DataSize getMaxSliceSize() + { + return maxSliceSize; + } + @Override public String toString() { @@ -98,6 +109,7 @@ public String toString() .add("mapNullKeysEnabled", mapNullKeysEnabled) .add("appendRowNumber", appendRowNumber) .add("readMapStatistics", readMapStatistics) + .add("maxSliceSize", maxSliceSize) .toString(); } @@ -115,6 +127,7 @@ public static final class Builder private boolean mapNullKeysEnabled; private boolean appendRowNumber; private boolean readMapStatistics; + private DataSize maxSliceSize = DEFAULT_MAX_SLICE_SIZE; private Builder() {} @@ -160,6 +173,12 @@ public Builder withReadMapStatistics(boolean readMapStatistics) return this; } + public Builder withMaxSliceSize(DataSize maxSliceSize) + { + this.maxSliceSize = maxSliceSize; + return this; + } + public OrcReaderOptions build() { return new OrcReaderOptions( @@ -169,7 +188,8 @@ public OrcReaderOptions build() zstdJniDecompressionEnabled, mapNullKeysEnabled, appendRowNumber, - readMapStatistics); + readMapStatistics, + maxSliceSize); } } } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/OrcRecordReaderOptions.java b/presto-orc/src/main/java/com/facebook/presto/orc/OrcRecordReaderOptions.java index 0b3d186b2c0a1..9a0c05b95eafd 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/OrcRecordReaderOptions.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/OrcRecordReaderOptions.java @@ -15,6 +15,7 @@ import io.airlift.units.DataSize; +import static com.google.common.base.Preconditions.checkArgument; import static java.util.Objects.requireNonNull; public class OrcRecordReaderOptions @@ -24,10 +25,16 @@ public class OrcRecordReaderOptions private final DataSize maxBlockSize; private final boolean mapNullKeysEnabled; private final boolean appendRowNumber; + private final long maxSliceSize; public OrcRecordReaderOptions(OrcReaderOptions options) { - this(options.getMaxMergeDistance(), options.getTinyStripeThreshold(), options.getMaxBlockSize(), options.mapNullKeysEnabled(), options.appendRowNumber()); + this(options.getMaxMergeDistance(), + options.getTinyStripeThreshold(), + options.getMaxBlockSize(), + options.mapNullKeysEnabled(), + options.appendRowNumber(), + options.getMaxSliceSize()); } public OrcRecordReaderOptions( @@ -35,13 +42,17 @@ public OrcRecordReaderOptions( DataSize tinyStripeThreshold, DataSize maxBlockSize, boolean mapNullKeysEnabled, - boolean appendRowNumber) + boolean appendRowNumber, + DataSize maxSliceSize) { this.maxMergeDistance = requireNonNull(maxMergeDistance, "maxMergeDistance is null"); this.maxBlockSize = requireNonNull(maxBlockSize, "maxBlockSize is null"); this.tinyStripeThreshold = requireNonNull(tinyStripeThreshold, "tinyStripeThreshold is null"); this.mapNullKeysEnabled = mapNullKeysEnabled; this.appendRowNumber = appendRowNumber; + checkArgument(maxSliceSize.toBytes() < Integer.MAX_VALUE, "maxSliceSize cannot be larger than Integer.MAX_VALUE"); + checkArgument(maxSliceSize.toBytes() > 0, "maxSliceSize must be positive"); + this.maxSliceSize = maxSliceSize.toBytes(); } public DataSize getMaxMergeDistance() @@ -68,4 +79,9 @@ public boolean appendRowNumber() { return appendRowNumber; } + + public long getMaxSliceSize() + { + return maxSliceSize; + } } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/BatchStreamReaders.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/BatchStreamReaders.java index 06a24642c20ad..aae184b92fb81 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/BatchStreamReaders.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/BatchStreamReaders.java @@ -49,7 +49,7 @@ public static BatchStreamReader createStreamReader(Type type, StreamDescriptor s case STRING: case VARCHAR: case CHAR: - return new SliceBatchStreamReader(type, streamDescriptor, systemMemoryContext); + return new SliceBatchStreamReader(type, streamDescriptor, systemMemoryContext, options.getMaxSliceSize()); case TIMESTAMP: case TIMESTAMP_MICROSECONDS: boolean enableMicroPrecision = type == TIMESTAMP_MICROSECONDS; diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/LongSelectiveStreamReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/LongSelectiveStreamReader.java index 9c8e8ab82c251..5116a7b496ecc 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/LongSelectiveStreamReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/LongSelectiveStreamReader.java @@ -51,9 +51,10 @@ public LongSelectiveStreamReader( Optional filter, Optional outputType, OrcAggregatedMemoryContext systemMemoryContext, - boolean isLowMemory) + boolean isLowMemory, + long maxSliceSize) { - this.context = new SelectiveReaderContext(streamDescriptor, outputType, filter, systemMemoryContext, isLowMemory); + this.context = new SelectiveReaderContext(streamDescriptor, outputType, filter, systemMemoryContext, isLowMemory, maxSliceSize); } @Override diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveReaderContext.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveReaderContext.java index 88c5719574db9..40b169cacb247 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveReaderContext.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveReaderContext.java @@ -40,13 +40,15 @@ public class SelectiveReaderContext private final OrcAggregatedMemoryContext systemMemoryContext; private final boolean isLowMemory; + private final long maxSliceSize; public SelectiveReaderContext( StreamDescriptor streamDescriptor, Optional outputType, Optional filter, OrcAggregatedMemoryContext systemMemoryContext, - boolean isLowMemory) + boolean isLowMemory, + long maxSliceSize) { this.filter = requireNonNull(filter, "filter is null").orElse(null); this.streamDescriptor = requireNonNull(streamDescriptor, "streamDescriptor is null"); @@ -57,6 +59,9 @@ public SelectiveReaderContext( this.isLowMemory = isLowMemory; this.nonDeterministicFilter = this.filter != null && !this.filter.isDeterministic(); this.nullsAllowed = this.filter == null || nonDeterministicFilter || this.filter.testNull(); + checkArgument(maxSliceSize < Integer.MAX_VALUE, "maxSliceSize cannot be larger than Integer.MAX_VALUE"); + checkArgument(maxSliceSize > 0, "maxSliceSize must be positive"); + this.maxSliceSize = maxSliceSize; } public StreamDescriptor getStreamDescriptor() @@ -106,4 +111,9 @@ public boolean isNullsAllowed() { return nullsAllowed; } + + public long getMaxSliceSize() + { + return maxSliceSize; + } } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveStreamReaders.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveStreamReaders.java index dbb3271e6f481..7f8508947da5d 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveStreamReaders.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SelectiveStreamReaders.java @@ -83,7 +83,7 @@ public static SelectiveStreamReader createStreamReader( case DATE: { checkArgument(requiredSubfields.isEmpty(), "Primitive type stream reader doesn't support subfields"); verifyStreamType(streamDescriptor, outputType, t -> t instanceof BigintType || t instanceof IntegerType || t instanceof SmallintType || t instanceof DateType); - return new LongSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext, isLowMemory); + return new LongSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext, isLowMemory, options.getMaxSliceSize()); } case FLOAT: { checkArgument(requiredSubfields.isEmpty(), "Float type stream reader doesn't support subfields"); @@ -100,7 +100,7 @@ public static SelectiveStreamReader createStreamReader( case CHAR: checkArgument(requiredSubfields.isEmpty(), "Primitive stream reader doesn't support subfields"); verifyStreamType(streamDescriptor, outputType, t -> t instanceof VarcharType || t instanceof CharType || t instanceof VarbinaryType); - return new SliceSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext, isLowMemory); + return new SliceSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext, isLowMemory, options.getMaxSliceSize()); case TIMESTAMP: case TIMESTAMP_MICROSECONDS: { boolean enableMicroPrecision = outputType.isPresent() && outputType.get() == TIMESTAMP_MICROSECONDS; diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceBatchStreamReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceBatchStreamReader.java index 3142d92074e07..b232a7683196d 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceBatchStreamReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceBatchStreamReader.java @@ -55,13 +55,13 @@ public class SliceBatchStreamReader private final SliceDictionaryBatchStreamReader dictionaryReader; private BatchStreamReader currentReader; - public SliceBatchStreamReader(Type type, StreamDescriptor streamDescriptor, OrcAggregatedMemoryContext systemMemoryContext) + public SliceBatchStreamReader(Type type, StreamDescriptor streamDescriptor, OrcAggregatedMemoryContext systemMemoryContext, long maxSliceSize) throws OrcCorruptionException { requireNonNull(type, "type is null"); verifyStreamType(streamDescriptor, type, t -> t instanceof VarcharType || t instanceof CharType || t instanceof VarbinaryType); this.streamDescriptor = requireNonNull(streamDescriptor, "stream is null"); - this.directReader = new SliceDirectBatchStreamReader(streamDescriptor, getMaxCodePointCount(type), isCharType(type)); + this.directReader = new SliceDirectBatchStreamReader(streamDescriptor, getMaxCodePointCount(type), isCharType(type), maxSliceSize); this.dictionaryReader = new SliceDictionaryBatchStreamReader(streamDescriptor, getMaxCodePointCount(type), isCharType(type), systemMemoryContext.newOrcLocalMemoryContext(SliceBatchStreamReader.class.getSimpleName())); } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectBatchStreamReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectBatchStreamReader.java index 7c9ce49879566..e28b6bec25e25 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectBatchStreamReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectBatchStreamReader.java @@ -27,7 +27,6 @@ import com.facebook.presto.orc.stream.LongInputStream; import io.airlift.slice.Slice; import io.airlift.slice.Slices; -import io.airlift.units.DataSize; import org.openjdk.jol.info.ClassLayout; import javax.annotation.Nullable; @@ -45,10 +44,10 @@ import static com.facebook.presto.orc.stream.MissingInputStreamSource.getByteArrayMissingStreamSource; import static com.facebook.presto.orc.stream.MissingInputStreamSource.getLongMissingStreamSource; import static com.google.common.base.MoreObjects.toStringHelper; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Verify.verify; import static io.airlift.slice.Slices.EMPTY_SLICE; -import static io.airlift.units.DataSize.Unit.GIGABYTE; import static java.lang.Math.toIntExact; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -57,11 +56,11 @@ public class SliceDirectBatchStreamReader implements BatchStreamReader { private static final int INSTANCE_SIZE = ClassLayout.parseClass(SliceDirectBatchStreamReader.class).instanceSize(); - private static final int ONE_GIGABYTE = toIntExact(new DataSize(1, GIGABYTE).toBytes()); private final StreamDescriptor streamDescriptor; private final int maxCodePointCount; private final boolean isCharType; + private final long maxSliceSize; private int readOffset; private int nextBatchSize; @@ -80,11 +79,14 @@ public class SliceDirectBatchStreamReader private boolean rowGroupOpen; - public SliceDirectBatchStreamReader(StreamDescriptor streamDescriptor, int maxCodePointCount, boolean isCharType) + public SliceDirectBatchStreamReader(StreamDescriptor streamDescriptor, int maxCodePointCount, boolean isCharType, long maxSliceSize) { this.maxCodePointCount = maxCodePointCount; this.isCharType = isCharType; this.streamDescriptor = requireNonNull(streamDescriptor, "stream is null"); + checkArgument(maxSliceSize < Integer.MAX_VALUE, "maxSliceSize cannot be larger than Integer.MAX_VALUE"); + checkArgument(maxSliceSize > 0, "maxSliceSize must be positive"); + this.maxSliceSize = maxSliceSize; } @Override @@ -176,8 +178,13 @@ public Block readBlock() if (totalLength == 0) { return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector)); } - if (totalLength > ONE_GIGABYTE) { - throw new GenericInternalException(format("Values in column \"%s\" are too large to process for Presto. %s column values are larger than 1GB [%s]", streamDescriptor.getFieldName(), currentBatchSize, streamDescriptor.getOrcDataSourceId())); + if (totalLength > maxSliceSize) { + throw new GenericInternalException( + format("Values in column \"%s\" are too large to process for Presto. Requested to read [%s] bytes, when max allowed is [%s] bytes [%s]", + streamDescriptor.getFieldName(), + totalLength, + maxSliceSize, + streamDescriptor.getOrcDataSourceId())); } if (dataStream == null) { throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but data stream is missing"); diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectSelectiveStreamReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectSelectiveStreamReader.java index 7895189df9f31..a531b389f4aec 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectSelectiveStreamReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceDirectSelectiveStreamReader.java @@ -32,7 +32,6 @@ import com.google.common.annotations.VisibleForTesting; import io.airlift.slice.Slice; import io.airlift.slice.Slices; -import io.airlift.units.DataSize; import org.openjdk.jol.info.ClassLayout; import javax.annotation.Nullable; @@ -57,8 +56,6 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static io.airlift.slice.SizeOf.sizeOf; -import static io.airlift.units.DataSize.Unit.GIGABYTE; -import static java.lang.Math.toIntExact; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -66,7 +63,6 @@ public class SliceDirectSelectiveStreamReader implements SelectiveStreamReader { private static final int INSTANCE_SIZE = ClassLayout.parseClass(SliceDirectSelectiveStreamReader.class).instanceSize(); - private static final int ONE_GIGABYTE = toIntExact(new DataSize(1, GIGABYTE).toBytes()); private final SelectiveReaderContext context; private final boolean isCharType; @@ -721,10 +717,12 @@ else if (isNotNull) { } // TODO Do not throw if outputRequired == false - if (totalLength > ONE_GIGABYTE) { + if (totalLength > context.getMaxSliceSize()) { throw new GenericInternalException( - format("Values in column \"%s\" are too large to process for Presto. %s column values are larger than 1GB [%s]", - context.getStreamDescriptor().getFieldName(), positionCount, + format("Values in column \"%s\" are too large to process for Presto. Requested to read [%s] bytes, when max allowed is [%s] bytes [%s]", + context.getStreamDescriptor().getFieldName(), + totalLength, + context.getMaxSliceSize(), context.getStreamDescriptor().getOrcDataSourceId())); } } diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceSelectiveStreamReader.java b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceSelectiveStreamReader.java index 0349d8d7b3f49..1d316c5c4799c 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceSelectiveStreamReader.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/reader/SliceSelectiveStreamReader.java @@ -50,9 +50,15 @@ public class SliceSelectiveStreamReader private SliceDictionarySelectiveReader dictionaryReader; private SelectiveStreamReader currentReader; - public SliceSelectiveStreamReader(StreamDescriptor streamDescriptor, Optional filter, Optional outputType, OrcAggregatedMemoryContext systemMemoryContext, boolean isLowMemory) + public SliceSelectiveStreamReader( + StreamDescriptor streamDescriptor, + Optional filter, + Optional outputType, + OrcAggregatedMemoryContext systemMemoryContext, + boolean isLowMemory, + long maxSliceSize) { - this.context = new SelectiveReaderContext(streamDescriptor, outputType, filter, systemMemoryContext, isLowMemory); + this.context = new SelectiveReaderContext(streamDescriptor, outputType, filter, systemMemoryContext, isLowMemory, maxSliceSize); } public static int computeTruncatedLength(Slice slice, int offset, int length, int maxCodePointCount, boolean isCharType) diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/reader/TestMaxSliceReadSize.java b/presto-orc/src/test/java/com/facebook/presto/orc/reader/TestMaxSliceReadSize.java new file mode 100644 index 0000000000000..2d69467df5e77 --- /dev/null +++ b/presto-orc/src/test/java/com/facebook/presto/orc/reader/TestMaxSliceReadSize.java @@ -0,0 +1,127 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.orc.reader; + +import com.facebook.presto.common.GenericInternalException; +import com.facebook.presto.common.Page; +import com.facebook.presto.common.RuntimeStats; +import com.facebook.presto.common.type.SqlVarbinary; +import com.facebook.presto.orc.DwrfKeyProvider; +import com.facebook.presto.orc.FileOrcDataSource; +import com.facebook.presto.orc.OrcDataSource; +import com.facebook.presto.orc.OrcEncoding; +import com.facebook.presto.orc.OrcPredicate; +import com.facebook.presto.orc.OrcReader; +import com.facebook.presto.orc.OrcReaderOptions; +import com.facebook.presto.orc.OrcSelectiveRecordReader; +import com.facebook.presto.orc.OrcTester; +import com.facebook.presto.orc.StorageStripeMetadataSource; +import com.facebook.presto.orc.TempFile; +import com.facebook.presto.orc.cache.StorageOrcFileTailSource; +import com.facebook.presto.orc.metadata.CompressionKind; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.airlift.units.DataSize; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.Optional; + +import static com.facebook.presto.common.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION; +import static com.facebook.presto.orc.NoOpOrcWriterStats.NOOP_WRITER_STATS; +import static com.facebook.presto.orc.NoopOrcAggregatedMemoryContext.NOOP_ORC_AGGREGATED_MEMORY_CONTEXT; +import static com.facebook.presto.orc.OrcTester.Format.DWRF; +import static com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE; +import static io.airlift.units.DataSize.Unit.KILOBYTE; +import static io.airlift.units.DataSize.Unit.MEGABYTE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.expectThrows; + +public class TestMaxSliceReadSize +{ + @Test + public void test() + throws Exception + { + SqlVarbinary value = new SqlVarbinary(new byte[10 * 1024]); + + try (TempFile tempFile = new TempFile()) { + OrcTester.writeOrcColumnsPresto( + tempFile.getFile(), + DWRF, + CompressionKind.NONE, + Optional.empty(), + ImmutableList.of(VARBINARY), + ImmutableList.of(ImmutableList.of(value)), + NOOP_WRITER_STATS); + + OrcSelectiveRecordReader readerNoLimits = createReader(tempFile, new DataSize(1, MEGABYTE)); + Page page = readerNoLimits.getNextPage().getLoadedPage(); + assertEquals(page.getPositionCount(), 1); + + OrcSelectiveRecordReader readerBelowThreshold = createReader(tempFile, new DataSize(1, KILOBYTE)); + GenericInternalException exception = expectThrows(GenericInternalException.class, () -> readerBelowThreshold.getNextPage().getLoadedPage()); + assertTrue(exception.getMessage().startsWith("Values in column \"test\" are too large to process for Presto. Requested to read [10240] bytes, when max allowed is [1024] bytes ")); + } + } + + private static OrcSelectiveRecordReader createReader(TempFile tempFile, DataSize maxSliceSize) + throws IOException + { + OrcDataSource orcDataSource = new FileOrcDataSource( + tempFile.getFile(), + new DataSize(1, MEGABYTE), + new DataSize(1, MEGABYTE), + new DataSize(1, MEGABYTE), + true); + + OrcReaderOptions options = OrcReaderOptions.builder() + .withMaxMergeDistance(new DataSize(1, MEGABYTE)) + .withTinyStripeThreshold(new DataSize(1, MEGABYTE)) + .withMaxBlockSize(new DataSize(1, MEGABYTE)) + .withMaxSliceSize(maxSliceSize) + .build(); + + OrcReader orcReader = new OrcReader( + orcDataSource, + OrcEncoding.DWRF, + new StorageOrcFileTailSource(), + new StorageStripeMetadataSource(), + NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, + options, + false, + NO_ENCRYPTION, + DwrfKeyProvider.EMPTY, + new RuntimeStats()); + + return orcReader.createSelectiveRecordReader( + ImmutableMap.of(0, VARBINARY), + ImmutableList.of(0), + ImmutableMap.of(), + ImmutableList.of(), + ImmutableMap.of(), + ImmutableMap.of(), + ImmutableMap.of(), + ImmutableMap.of(), + OrcPredicate.TRUE, + 0, + orcDataSource.getSize(), + HIVE_STORAGE_TIME_ZONE, + NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, + Optional.empty(), + 1); + } +}