diff --git a/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestFlatDefinitionLevelDecoder.java b/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestFlatDefinitionLevelDecoder.java index d081d227480f0..92fa0986523c7 100644 --- a/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestFlatDefinitionLevelDecoder.java +++ b/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestFlatDefinitionLevelDecoder.java @@ -21,11 +21,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Random; import static com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.addDLRLEBlock; import static com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.addDLValues; -import static com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.randomValues; +import static com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.fillValues; import static java.lang.Math.min; import static org.testng.Assert.assertEquals; import static org.testng.Assert.fail; @@ -35,19 +34,18 @@ public class TestFlatDefinitionLevelDecoder private static int valueCount; private static int nonNullCount; private static byte[] pageBytes; - private static List expectedValues = new ArrayList<>(); + private static final List expectedValues = new ArrayList<>(); @BeforeClass public void setup() throws IOException { - Random random = new Random(200); RunLengthBitPackingHybridEncoder encoder = TestParquetUtils.getSimpleDLEncoder(); addDLRLEBlock(1, 50, encoder, expectedValues); - addDLValues(randomValues(random, 457, 1), encoder, expectedValues); + addDLValues(fillValues(457, 1), encoder, expectedValues); addDLRLEBlock(0, 37, encoder, expectedValues); - addDLValues(randomValues(random, 186, 1), encoder, expectedValues); + addDLValues(fillValues(186, 1), encoder, expectedValues); valueCount = expectedValues.size(); for (Integer value : expectedValues) { diff --git a/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestParquetUtils.java b/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestParquetUtils.java index 9168934f92133..9d8a6a2818c95 100644 --- a/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestParquetUtils.java +++ b/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestParquetUtils.java @@ -13,7 +13,6 @@ */ package com.facebook.presto.parquet.batchreader.decoders; -import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.parquet.bytes.BytesUtils; @@ -31,7 +30,6 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.Random; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; @@ -80,16 +78,16 @@ public static void addDLValues(Iterator values, RunLengthBitPackingHybr } } - public static Iterator randomValues(Random random, int numValues, int maxValue) + public static Iterator fillValues(int numValues, int maxValue) { List values = new ArrayList<>(); for (int i = 0; i < numValues; i++) { - values.add(random.nextInt(maxValue + 1)); + values.add(maxValue); } return values.iterator(); } - public static byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, Random random, List addedValues) + public static byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, List addedValues, int valueInt, long valueLong, int positiveUpperBoundedInt) { ValuesWriter writer; @@ -103,15 +101,14 @@ public static byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, switch (valueSizeBits) { case 1: { for (int i = 0; i < valueCount; i++) { - int value = random.nextInt(2); - writer.writeInteger(value); - addedValues.add(value); + writer.writeInteger(positiveUpperBoundedInt); + addedValues.add(positiveUpperBoundedInt); } break; } case -1: { for (int i = 0; i < valueCount; i++) { - String valueStr = RandomStringUtils.random(random.nextInt(10), 0, 0, true, true, null, random); + String valueStr = "4nY" + valueCount; byte[] valueUtf8 = valueStr.getBytes(StandardCharsets.UTF_8); writer.writeBytes(Binary.fromConstantByteArray(valueUtf8, 0, valueUtf8.length)); addedValues.add(valueStr); @@ -120,23 +117,21 @@ public static byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, } case 32: { for (int i = 0; i < valueCount; i++) { - int value = random.nextInt(); - writer.writeInteger(value); - addedValues.add(value); + writer.writeInteger(valueInt); + addedValues.add(valueInt); } break; } case 64: { for (int i = 0; i < valueCount; i++) { - long value = random.nextLong(); - writer.writeLong(value); - addedValues.add(value); + writer.writeLong(valueLong); + addedValues.add(valueLong); } break; } case 96: { for (int i = 0; i < valueCount; i++) { - long millisValue = Long.valueOf(random.nextInt(1572281176) * 1000); + long millisValue = positiveUpperBoundedInt * 1000L; NanoTime nanoTime = NanoTimeUtils.getNanoTime(new Timestamp(millisValue), false); writer.writeLong(nanoTime.getTimeOfDayNanos()); writer.writeInteger(nanoTime.getJulianDay()); @@ -146,12 +141,10 @@ public static byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, } case 128: for (int i = 0; i < valueCount; i++) { - long value = random.nextLong(); - writer.writeLong(value); - addedValues.add(value); - value = random.nextLong(); - writer.writeLong(value); - addedValues.add(value); + writer.writeLong(valueLong); + addedValues.add(valueLong); + writer.writeLong(valueLong); + addedValues.add(valueLong); } break; default: @@ -166,19 +159,19 @@ public static byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, } } - public static byte[] generateDictionaryIdPage2048(int maxValue, Random random, List addedValues) + public static byte[] generateDictionaryIdPage2048(int maxValue, List addedValues, int fillerValue) { RunLengthBitPackingHybridEncoder encoder = getDictionaryDataPageEncoder(maxValue); addDLRLEBlock(maxValue / 2, 50, encoder, addedValues); - addDLValues(randomValues(random, 457, maxValue), encoder, addedValues); + addDLValues(fillValues(457, fillerValue), encoder, addedValues); addDLRLEBlock(0, 37, encoder, addedValues); - addDLValues(randomValues(random, 186, maxValue), encoder, addedValues); - addDLValues(randomValues(random, 289, maxValue), encoder, addedValues); + addDLValues(fillValues(186, fillerValue), encoder, addedValues); + addDLValues(fillValues(289, fillerValue), encoder, addedValues); addDLRLEBlock(maxValue - 1, 76, encoder, addedValues); - addDLValues(randomValues(random, 789, maxValue), encoder, addedValues); + addDLValues(fillValues(789, fillerValue), encoder, addedValues); addDLRLEBlock(maxValue - 1, 137, encoder, addedValues); - addDLValues(randomValues(random, 27, maxValue), encoder, addedValues); + addDLValues(fillValues(27, fillerValue), encoder, addedValues); checkState(addedValues.size() == 2048); diff --git a/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestValuesDecoders.java b/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestValuesDecoders.java index c94a68e8d19e3..e42e9af8987da 100644 --- a/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestValuesDecoders.java +++ b/presto-parquet/src/test/java/com/facebook/presto/parquet/batchreader/decoders/TestValuesDecoders.java @@ -52,19 +52,16 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Random; import java.util.stream.Collectors; import static com.facebook.presto.parquet.ParquetEncoding.PLAIN_DICTIONARY; -import static com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.generateDictionaryIdPage2048; -import static com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.generatePlainValuesPage; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.Math.min; -import static org.apache.parquet.bytes.BytesUtils.UTF8; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.parquet.bytes.BytesUtils.getWidthFromMaxInt; import static org.testng.Assert.assertEquals; -public class TestValuesDecoders +public abstract class TestValuesDecoders { private static Int32ValuesDecoder int32Plain(byte[] pageBytes) { @@ -215,7 +212,7 @@ private static void binaryBatchReadWithSkipHelper(int batchSize, int skipSize, i decoder.readIntoBuffer(byteBuffer, 0, offsets, 0, valueBuffer); for (int i = 0; i < readBatchSize; i++) { - byte[] expected = ((String) expectedValues.get(inputOffset + i)).getBytes(UTF8); + byte[] expected = ((String) expectedValues.get(inputOffset + i)).getBytes(UTF_8); byte[] actual = Arrays.copyOfRange(byteBuffer, offsets[i], offsets[i + 1]); assertEquals(expected, actual); } @@ -365,6 +362,10 @@ private static void booleanBatchReadWithSkipHelper(int batchSize, int skipSize, } } + public abstract byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, List addedValues); + + public abstract byte[] generateDictionaryIdPage2048(int maxValue, List addedValues); + @Test public void testInt32Plain() throws IOException @@ -372,7 +373,7 @@ public void testInt32Plain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, 32, new Random(89), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, 32, expectedValues); int32BatchReadWithSkipHelper(valueCount, 0, valueCount, int32Plain(pageBytes), expectedValues); // read all values in one batch int32BatchReadWithSkipHelper(29, 0, valueCount, int32Plain(pageBytes), expectedValues); @@ -388,14 +389,13 @@ public void testInt32Plain() public void testInt32RLEDictionary() throws IOException { - Random random = new Random(83); int valueCount = 2048; int dictionarySize = 29; List dictionary = new ArrayList<>(); List dictionaryIds = new ArrayList<>(); - byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 32, random, dictionary); - byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds); + byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 32, dictionary); + byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, dictionaryIds); List expectedValues = new ArrayList<>(); for (Integer dictionaryId : dictionaryIds) { @@ -421,7 +421,7 @@ public void testBinaryPlain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, -1, new Random(113), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, -1, expectedValues); binaryBatchReadWithSkipHelper(valueCount, 0, valueCount, binaryPlain(pageBytes), expectedValues); // read all values in one batch binaryBatchReadWithSkipHelper(29, 0, valueCount, binaryPlain(pageBytes), expectedValues); @@ -437,14 +437,13 @@ public void testBinaryPlain() public void testBinaryRLEDictionary() throws IOException { - Random random = new Random(83); int valueCount = 2048; int dictionarySize = 29; List dictionary = new ArrayList<>(); List dictionaryIds = new ArrayList<>(); - byte[] dictionaryPage = TestParquetUtils.generatePlainValuesPage(dictionarySize, -1, random, dictionary); - byte[] dataPage = TestParquetUtils.generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds); + byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, -1, dictionary); + byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, dictionaryIds); List expectedValues = new ArrayList<>(); for (Integer dictionaryId : dictionaryIds) { @@ -470,7 +469,7 @@ public void testInt64Plain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, 64, new Random(89), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, 64, expectedValues); int64BatchReadWithSkipHelper(valueCount, 0, valueCount, int64Plain(pageBytes), expectedValues); // read all values in one batch int64BatchReadWithSkipHelper(29, 0, valueCount, int64Plain(pageBytes), expectedValues); @@ -496,14 +495,13 @@ public void testInt64Plain() public void testInt64RLEDictionary() throws IOException { - Random random = new Random(83); int valueCount = 2048; int dictionarySize = 29; List dictionary = new ArrayList<>(); List dictionaryIds = new ArrayList<>(); - byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 64, random, dictionary); - byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds); + byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 64, dictionary); + byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, dictionaryIds); List expectedValues = new ArrayList<>(); for (Integer dictionaryId : dictionaryIds) { @@ -539,7 +537,7 @@ public void testTimestampPlain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, 96, new Random(83), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, 96, expectedValues); timestampBatchReadWithSkipHelper(valueCount, 0, valueCount, timestampPlain(pageBytes), expectedValues); // read all values in one batch timestampBatchReadWithSkipHelper(29, 0, valueCount, timestampPlain(pageBytes), expectedValues); @@ -555,14 +553,13 @@ public void testTimestampPlain() public void testTimestampRLEDictionary() throws IOException { - Random random = new Random(83); int valueCount = 2048; int dictionarySize = 29; List dictionary = new ArrayList<>(); List dictionaryIds = new ArrayList<>(); - byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 96, random, dictionary); - byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds); + byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 96, dictionary); + byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, dictionaryIds); List expectedValues = new ArrayList<>(); for (Integer dictionaryId : dictionaryIds) { @@ -587,7 +584,7 @@ public void testBooleanPlain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, 1, new Random(83), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, 1, expectedValues); booleanBatchReadWithSkipHelper(valueCount, 0, valueCount, booleanPlain(pageBytes), expectedValues); // read all values in one batch booleanBatchReadWithSkipHelper(29, 0, valueCount, booleanPlain(pageBytes), expectedValues); @@ -602,16 +599,12 @@ public void testBooleanPlain() @Test public void testBooleanRLE() { - Random random = new Random(111); int valueCount = 2048; List values = new ArrayList<>(); - byte[] dataPage = generateDictionaryIdPage2048(1, random, values); + byte[] dataPage = generateDictionaryIdPage2048(1, values); - List expectedValues = new ArrayList<>(); - for (Integer value : values) { - expectedValues.add(value.intValue()); - } + List expectedValues = new ArrayList<>(values); booleanBatchReadWithSkipHelper(valueCount, 0, valueCount, booleanRLE(dataPage), expectedValues); booleanBatchReadWithSkipHelper(29, 0, valueCount, booleanRLE(dataPage), expectedValues); @@ -630,7 +623,7 @@ public void testInt32ShortDecimalPlain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, 32, new Random(83), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, 32, expectedValues); int32ShortDecimalBatchReadWithSkipHelper(valueCount, 0, valueCount, int32ShortDecimalPlain(pageBytes), expectedValues); // read all values in one batch int32ShortDecimalBatchReadWithSkipHelper(29, 0, valueCount, int32ShortDecimalPlain(pageBytes), expectedValues); int32ShortDecimalBatchReadWithSkipHelper(89, 0, valueCount, int32ShortDecimalPlain(pageBytes), expectedValues); @@ -648,7 +641,7 @@ public void testInt64ShortDecimalPlain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, 64, new Random(83), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, 64, expectedValues); int64ShortDecimalBatchReadWithSkipHelper(valueCount, 0, valueCount, int64ShortDecimalPlain(pageBytes), expectedValues); // read all values in one batch int64ShortDecimalBatchReadWithSkipHelper(29, 0, valueCount, int64ShortDecimalPlain(pageBytes), expectedValues); int64ShortDecimalBatchReadWithSkipHelper(89, 0, valueCount, int64ShortDecimalPlain(pageBytes), expectedValues); @@ -663,14 +656,13 @@ public void testInt64ShortDecimalPlain() public void testInt32ShortDecimalRLE() throws IOException { - Random random = new Random(83); int valueCount = 2048; int dictionarySize = 29; List dictionary = new ArrayList<>(); List dictionaryIds = new ArrayList<>(); - byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 32, random, dictionary); - byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds); + byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 32, dictionary); + byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, dictionaryIds); List expectedValues = new ArrayList<>(); for (Integer dictionaryId : dictionaryIds) { @@ -693,14 +685,13 @@ public void testInt32ShortDecimalRLE() public void testInt64ShortDecimalRLE() throws IOException { - Random random = new Random(83); int valueCount = 2048; int dictionarySize = 29; List dictionary = new ArrayList<>(); List dictionaryIds = new ArrayList<>(); - byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 64, random, dictionary); - byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds); + byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 64, dictionary); + byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, dictionaryIds); List expectedValues = new ArrayList<>(); for (Integer dictionaryId : dictionaryIds) { @@ -726,7 +717,7 @@ public void testUuidPlainPlain() int valueCount = 2048; List expectedValues = new ArrayList<>(); - byte[] pageBytes = generatePlainValuesPage(valueCount, 128, new Random(83), expectedValues); + byte[] pageBytes = generatePlainValuesPage(valueCount, 128, expectedValues); // page is read assuming in big endian, so we need to flip the bytes around when comparing read values expectedValues = expectedValues.stream() .map(Long.class::cast) @@ -749,13 +740,12 @@ public void testUuidRLEDictionary() throws IOException { int valueCount = 2048; - Random random = new Random(83); int dictionarySize = 29; List dictionary = new ArrayList<>(); List dictionaryIds = new ArrayList<>(); - byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 128, random, dictionary); - byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds); + byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 128, dictionary); + byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, dictionaryIds); List expectedValues = new ArrayList<>(); for (Integer dictionaryId : dictionaryIds) { @@ -780,4 +770,84 @@ public void testUuidRLEDictionary() uuidBatchReadWithSkipHelper(89, 29, valueCount, uuidRle(dataPage, dictionarySize, binaryDictionary), expectedValues); uuidBatchReadWithSkipHelper(1024, 1024, valueCount, uuidRle(dataPage, dictionarySize, binaryDictionary), expectedValues); } + + public static class TestValueDecodersArbitrary + extends TestValuesDecoders + { + public static final int ARBITRARY_VALUE = 237; + + @Override + public byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, List addedValues) + { + int positiveUpperBoundedInt = getPositiveUpperBoundedInt(valueSizeBits); + return TestParquetUtils.generatePlainValuesPage(valueCount, valueSizeBits, addedValues, ARBITRARY_VALUE, ARBITRARY_VALUE * (1L << 31), positiveUpperBoundedInt); + } + + @Override + public byte[] generateDictionaryIdPage2048(int maxValue, List addedValues) + { + return TestParquetUtils.generateDictionaryIdPage2048(maxValue, addedValues, ARBITRARY_VALUE % maxValue); + } + + private int getPositiveUpperBoundedInt(int valueSizeBits) + { + if (valueSizeBits == 1) { + return ARBITRARY_VALUE % 2; + } + if (valueSizeBits == 96) { + return ARBITRARY_VALUE % 1572281176; + } + return ARBITRARY_VALUE; + } + } + + public static class TestValueDecodersLowerBounded + extends TestValuesDecoders + { + @Override + public byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, List addedValues) + { + return TestParquetUtils.generatePlainValuesPage(valueCount, valueSizeBits, addedValues, Integer.MIN_VALUE, 0L, getPositiveUpperBoundedInt()); + } + + private int getPositiveUpperBoundedInt() + { + return 0; + } + + @Override + public byte[] generateDictionaryIdPage2048(int maxValue, List addedValues) + { + return TestParquetUtils.generateDictionaryIdPage2048(maxValue, addedValues, getPositiveUpperBoundedInt()); + } + } + + public static class TestValueDecodersUpperBounded + extends TestValuesDecoders + { + private static int getPositiveUpperBoundedInt(int valueSizeBits) + { + int positiveUpperBoundedInt = Integer.MAX_VALUE; + if (valueSizeBits == 1) { + positiveUpperBoundedInt = 1; + } + if (valueSizeBits == 96) { + positiveUpperBoundedInt = 1572281175; + } + return positiveUpperBoundedInt; + } + + @Override + public byte[] generatePlainValuesPage(int valueCount, int valueSizeBits, List addedValues) + { + int positiveUpperBoundedInt = getPositiveUpperBoundedInt(valueSizeBits); + return TestParquetUtils.generatePlainValuesPage(valueCount, valueSizeBits, addedValues, Integer.MAX_VALUE, Long.MAX_VALUE, positiveUpperBoundedInt); + } + + @Override + public byte[] generateDictionaryIdPage2048(int maxValue, List addedValues) + { + return TestParquetUtils.generateDictionaryIdPage2048(maxValue, addedValues, Math.abs(maxValue)); + } + } }