Skip to content

Commit

Permalink
Remove Stats and Jmx dependency from presto-orc
Browse files Browse the repository at this point in the history
presto-orc for WriterStats used the OrcWriterFlushStats. This brings in
the JMX and stats dependency. Our team uses the orc as file reader and
writer. Due to recent introduction of drift dependency in airlift-stats
our binary size increased. Moving the OrcWriterStats to presto-hive
to avoid the dependency in presto-orc. In presto hive only class name
is used in the JMX export path, so this will have no change in the JMX.
  • Loading branch information
Arunachalam Thirupathi committed Apr 29, 2022
1 parent 0f24802 commit c5c0866
Show file tree
Hide file tree
Showing 22 changed files with 96 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import com.facebook.presto.orc.OrcDataSource;
import com.facebook.presto.orc.OrcDataSourceId;
import com.facebook.presto.orc.OrcEncoding;
import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.orc.WriterEncryptionGroup;
import com.facebook.presto.orc.metadata.CompressionKind;
import com.facebook.presto.orc.metadata.KeyProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc;
package com.facebook.presto.hive;

import com.facebook.airlift.stats.DistributionStat;
import org.weakref.jmx.Managed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc;
package com.facebook.presto.hive;

import com.facebook.presto.orc.FlushReason;
import com.facebook.presto.orc.WriterStats;
import com.facebook.presto.orc.metadata.StripeInformation;
import org.weakref.jmx.Managed;
import org.weakref.jmx.Nested;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
import com.facebook.presto.common.io.DataSink;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.orc.DefaultOrcWriterFlushPolicy;
import com.facebook.presto.orc.NoOpOrcWriterStats;
import com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode;
import com.facebook.presto.orc.OrcWriter;
import com.facebook.presto.orc.OrcWriterOptions;
import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.spi.PrestoException;
import com.google.common.collect.ImmutableMap;
import io.airlift.units.DataSize;
Expand Down Expand Up @@ -100,7 +100,7 @@ private static OrcWriter createOrcFileWriter(DataSink sink, List<Type> types)
UTC,
false,
OrcWriteValidationMode.BOTH,
new OrcWriterStats());
new NoOpOrcWriterStats());
}
catch (NotSupportedException e) {
throw new PrestoException(NOT_SUPPORTED, e.getMessage(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
import com.facebook.presto.hive.pagefile.PageWriter;
import com.facebook.presto.hive.parquet.ParquetPageSourceFactory;
import com.facebook.presto.hive.rcfile.RcFilePageSourceFactory;
import com.facebook.presto.orc.NoOpOrcWriterStats;
import com.facebook.presto.orc.OrcWriter;
import com.facebook.presto.orc.OrcWriterOptions;
import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.orc.StorageStripeMetadataSource;
import com.facebook.presto.orc.StripeMetadataSourceFactory;
import com.facebook.presto.orc.cache.StorageOrcFileTailSource;
Expand Down Expand Up @@ -610,7 +610,7 @@ public PrestoOrcFormatWriter(File targetFile, List<String> columnNames, List<Typ
hiveStorageTimeZone,
false,
BOTH,
new OrcWriterStats());
new NoOpOrcWriterStats());
}

@Override
Expand Down Expand Up @@ -649,7 +649,7 @@ public PrestoDwrfFormatWriter(File targetFile, List<String> columnNames, List<Ty
hiveStorageTimeZone,
false,
BOTH,
new OrcWriterStats());
new NoOpOrcWriterStats());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
import com.facebook.presto.hive.orc.HdfsOrcDataSource;
import com.facebook.presto.orc.DefaultOrcWriterFlushPolicy;
import com.facebook.presto.orc.DwrfEncryptionProvider;
import com.facebook.presto.orc.NoOpOrcWriterStats;
import com.facebook.presto.orc.OrcDataSource;
import com.facebook.presto.orc.OrcDataSourceId;
import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.parquet.writer.ParquetWriterOptions;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PrestoException;
Expand Down Expand Up @@ -78,7 +78,7 @@ public class IcebergFileWriterFactory
private final TypeManager typeManager;
private final FileFormatDataSourceStats readStats;
private final NodeVersion nodeVersion;
private final OrcWriterStats orcWriterStats = new OrcWriterStats();
private final NoOpOrcWriterStats orcWriterStats = new NoOpOrcWriterStats();
private final OrcFileWriterConfig orcFileWriterConfig;
private final DwrfEncryptionProvider dwrfEncryptionProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
import com.facebook.presto.hive.OrcFileWriter;
import com.facebook.presto.orc.DwrfEncryptionProvider;
import com.facebook.presto.orc.DwrfWriterEncryption;
import com.facebook.presto.orc.NoOpOrcWriterStats;
import com.facebook.presto.orc.OrcDataSource;
import com.facebook.presto.orc.OrcEncoding;
import com.facebook.presto.orc.OrcWriteValidation;
import com.facebook.presto.orc.OrcWriterOptions;
import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.orc.metadata.CompressionKind;
import com.facebook.presto.orc.metadata.OrcType;
import com.facebook.presto.orc.metadata.statistics.ColumnStatistics;
Expand Down Expand Up @@ -77,7 +77,7 @@ public IcebergOrcFileWriter(
DateTimeZone hiveStorageTimeZone,
Optional<Supplier<OrcDataSource>> validationInputFactory,
OrcWriteValidation.OrcWriteValidationMode validationMode,
OrcWriterStats stats,
NoOpOrcWriterStats stats,
DwrfEncryptionProvider dwrfEncryptionProvider,
Optional<DwrfWriterEncryption> dwrfWriterEncryption)
{
Expand Down
10 changes: 0 additions & 10 deletions presto-orc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@
<artifactId>aircompressor</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.airlift</groupId>
<artifactId>stats</artifactId>
</dependency>

<dependency>
<groupId>com.facebook.airlift</groupId>
<artifactId>configuration</artifactId>
Expand All @@ -76,11 +71,6 @@
<artifactId>jol-core</artifactId>
</dependency>

<dependency>
<groupId>org.weakref</groupId>
<artifactId>jmxutils</artifactId>
</dependency>

<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc;

import com.facebook.presto.orc.metadata.StripeInformation;

import static com.google.common.base.MoreObjects.toStringHelper;

public class NoOpOrcWriterStats
implements WriterStats
{
@Override
public void recordStripeWritten(
int stripeMinBytes,
int stripeMaxBytes,
int dictionaryMaxMemoryBytes,
FlushReason flushReason,
int dictionaryBytes,
StripeInformation stripeInformation)
{
}

@Override
public void updateSizeInBytes(long deltaInBytes)
{
}

@Override
public String toString()
{
return toStringHelper(this)
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ private static TempFile writeOrcFile(boolean cacheEnabled, DwrfStripeCacheMode c
HIVE_STORAGE_TIME_ZONE,
true,
BOTH,
new OrcWriterStats());
new NoOpOrcWriterStats());

// write 4 stripes with 100 values each
int count = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ else if (typeSignature.startsWith("varchar")) {
}

// Use writeOrcColumnsPresto so that orcType and varchar length can be written in file footer
writeOrcColumnsPresto(orcFile, ORC_12, NONE, Optional.empty(), Collections.nCopies(channelCount, type), values, new OrcWriterStats());
writeOrcColumnsPresto(orcFile, ORC_12, NONE, Optional.empty(), Collections.nCopies(channelCount, type), values, new NoOpOrcWriterStats());
}

@TearDown
Expand Down
20 changes: 17 additions & 3 deletions presto-orc/src/test/java/com/facebook/presto/orc/OrcTester.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
Expand Down Expand Up @@ -616,7 +617,20 @@ private void assertRoundTrip(List<Type> writeTypes, List<Type> readTypes, List<L
assertEquals(writeTypes.size(), writeValues.size());
assertEquals(writeTypes.size(), readValues.size());

OrcWriterStats stats = new OrcWriterStats();
AtomicLong totalSize = new AtomicLong(0);
WriterStats stats = new WriterStats() {
@Override
public void recordStripeWritten(int stripeMinBytes, int stripeMaxBytes, int dictionaryMaxMemoryBytes, FlushReason flushReason, int dictionaryBytes, StripeInformation stripeInformation)
{
}

@Override
public void updateSizeInBytes(long deltaInBytes)
{
totalSize.getAndAdd(deltaInBytes);
}
};

for (Format format : formats) {
if (!readTypes.stream().allMatch(readType -> format.supportsType(readType))) {
return;
Expand Down Expand Up @@ -712,7 +726,7 @@ private void assertRoundTrip(List<Type> writeTypes, List<Type> readTypes, List<L
}
}

assertEquals(stats.getWriterSizeInBytes(), 0);
assertEquals(totalSize.get(), 0);
}

public static class OrcReaderSettings
Expand Down Expand Up @@ -1528,7 +1542,7 @@ static OrcReader createCustomOrcReader(
public static void writeOrcColumnPresto(File outputFile, Format format, CompressionKind compression, Type type, List<?> values)
throws Exception
{
writeOrcColumnsPresto(outputFile, format, compression, Optional.empty(), ImmutableList.of(type), ImmutableList.of(values), new OrcWriterStats());
writeOrcColumnsPresto(outputFile, format, compression, Optional.empty(), ImmutableList.of(type), ImmutableList.of(values), new NoOpOrcWriterStats());
}

public static void writeOrcColumnsPresto(File outputFile, Format format, CompressionKind compression, Optional<DwrfWriterEncryption> dwrfWriterEncryption, List<Type> types, List<List<?>> values, WriterStats stats)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ private static void testDecryptionRoundTrip(
throws Exception
{
try (TempFile tempFile = new TempFile()) {
writeOrcColumnsPresto(tempFile.getFile(), OrcTester.Format.DWRF, ZSTD, dwrfWriterEncryption, types, writtenValues, new OrcWriterStats());
writeOrcColumnsPresto(tempFile.getFile(), OrcTester.Format.DWRF, ZSTD, dwrfWriterEncryption, types, writtenValues, new NoOpOrcWriterStats());

assertFileContentsPresto(
types,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@ private List<StripeFooter> testDictionary(Type type, OrcEncoding encoding, OrcWr
{
List<Type> types = ImmutableList.of(type);
try (TempFile tempFile = new TempFile()) {
OrcWriter writer = createOrcWriter(tempFile.getFile(), encoding, ZSTD, Optional.empty(), types, orcWriterOptions, new OrcWriterStats());
OrcWriter writer = createOrcWriter(tempFile.getFile(), encoding, ZSTD, Optional.empty(), types, orcWriterOptions, new NoOpOrcWriterStats());

int index = 0;
int batchId = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public void testEmptyStrings()

for (CompressionKind compression : compressions) {
TempFile tempFile = new TempFile();
writeOrcColumnsPresto(tempFile.getFile(), format, compression, Optional.empty(), types, values, new OrcWriterStats());
writeOrcColumnsPresto(tempFile.getFile(), format, compression, Optional.empty(), types, values, new NoOpOrcWriterStats());

OrcPredicate orcPredicate = createOrcPredicate(types, values, DWRF, false);
Map<Integer, Type> includedColumns = IntStream.range(0, types.size())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ private void testStreamOrder(OrcEncoding encoding, CompressionKind kind, Optiona
HIVE_STORAGE_TIME_ZONE,
true,
validationMode,
new OrcWriterStats());
new NoOpOrcWriterStats());

// write down some data with unsorted streams
String[] data = new String[] {"a", "bbbbb", "ccc", "dd", "eeee"};
Expand Down Expand Up @@ -209,7 +209,7 @@ public void testVerifyNoIllegalStateException()
HIVE_STORAGE_TIME_ZONE,
false,
null,
new OrcWriterStats());
new NoOpOrcWriterStats());

int entries = 65536;
BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ private void testMemoryTracking(CompressionKind compression, long lowerRetainedM
List<String> varcharDictionaryValues = newArrayList(limit(cycle(ImmutableList.of("apple", "apple pie", "apple\uD835\uDC03", "apple\uFFFD")), NUM_ROWS));
List<List<?>> values = ImmutableList.of(intValues, varcharDirectValues, varcharDictionaryValues);

writeOrcColumnsPresto(tempFile.getFile(), DWRF, compression, Optional.empty(), types, values, new OrcWriterStats());
writeOrcColumnsPresto(tempFile.getFile(), DWRF, compression, Optional.empty(), types, values, new NoOpOrcWriterStats());

OrcPredicate orcPredicate = createOrcPredicate(types, values, DWRF, false);
Map<Integer, Type> includedColumns = IntStream.range(0, types.size())
Expand Down Expand Up @@ -1015,7 +1015,7 @@ public void testOutputNotRequired()
List<String> varcharDirectValues = newArrayList(limit(cycle(ImmutableList.of("A", "B", "C")), NUM_ROWS));
List<List<?>> values = ImmutableList.of(varcharDirectValues, varcharDirectValues);

writeOrcColumnsPresto(tempFile.getFile(), DWRF, NONE, Optional.empty(), types, values, new OrcWriterStats());
writeOrcColumnsPresto(tempFile.getFile(), DWRF, NONE, Optional.empty(), types, values, new NoOpOrcWriterStats());

OrcPredicate orcPredicate = createOrcPredicate(types, values, DWRF, false);
Map<Subfield, TupleDomainFilter> filters = ImmutableMap.of(new Subfield("c"), stringIn(true, "A", "B", "C")); //ImmutableMap.of(1, stringIn(true, "10", "11"));
Expand Down Expand Up @@ -1094,7 +1094,7 @@ public void testAdaptiveBatchSizes()
}
}

writeOrcColumnsPresto(tempFile.getFile(), DWRF, NONE, Optional.empty(), types, ImmutableList.of(values), new OrcWriterStats());
writeOrcColumnsPresto(tempFile.getFile(), DWRF, NONE, Optional.empty(), types, ImmutableList.of(values), new NoOpOrcWriterStats());

try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile, OrcEncoding.DWRF, OrcPredicate.TRUE, type, MAX_BATCH_SIZE, false, false)) {
assertEquals(recordReader.getFileRowCount(), rowCount);
Expand Down Expand Up @@ -1131,7 +1131,7 @@ public void testHiddenConstantColumns()
List<List<?>> values = ImmutableList.of(ImmutableList.of(1L, 2L));

TempFile tempFile = new TempFile();
writeOrcColumnsPresto(tempFile.getFile(), DWRF, ZSTD, Optional.empty(), types, values, new OrcWriterStats());
writeOrcColumnsPresto(tempFile.getFile(), DWRF, ZSTD, Optional.empty(), types, values, new NoOpOrcWriterStats());

// Hidden columns like partition columns use negative indices (-13).
int hiddenColumnIndex = -13;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ private void write(TempFile tempFile, Type writerType, List<String> data)
HIVE_STORAGE_TIME_ZONE,
true,
BOTH,
new OrcWriterStats());
new NoOpOrcWriterStats());

// write down some data with unsorted streams
Block[] fieldBlocks = new Block[data.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
import com.facebook.presto.orc.DefaultOrcWriterFlushPolicy;
import com.facebook.presto.orc.DwrfEncryptionInfo;
import com.facebook.presto.orc.FileOrcDataSource;
import com.facebook.presto.orc.NoOpOrcWriterStats;
import com.facebook.presto.orc.OrcEncoding;
import com.facebook.presto.orc.OrcTester;
import com.facebook.presto.orc.OrcWriter;
import com.facebook.presto.orc.OrcWriterOptions;
import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.orc.TempFile;
import com.facebook.presto.orc.metadata.Footer;
import com.facebook.presto.orc.metadata.OrcType;
Expand Down Expand Up @@ -310,7 +310,7 @@ public void testFileMetadataRawSize()

for (OrcEncoding encoding : OrcEncoding.values()) {
try (TempFile tempFile = new TempFile()) {
OrcWriter writer = createOrcWriter(tempFile.getFile(), encoding, ZSTD, Optional.empty(), types, writerOptions, new OrcWriterStats());
OrcWriter writer = createOrcWriter(tempFile.getFile(), encoding, ZSTD, Optional.empty(), types, writerOptions, new NoOpOrcWriterStats());
for (int i = 0; i < numBlocksPerFile; i++) {
writer.write(new Page(blocks));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HiveFileContext;
import com.facebook.presto.orc.DwrfKeyProvider;
import com.facebook.presto.orc.NoOpOrcWriterStats;
import com.facebook.presto.orc.OrcAggregatedMemoryContext;
import com.facebook.presto.orc.OrcBatchRecordReader;
import com.facebook.presto.orc.OrcDataSource;
import com.facebook.presto.orc.OrcPredicate;
import com.facebook.presto.orc.OrcReader;
import com.facebook.presto.orc.OrcReaderOptions;
import com.facebook.presto.orc.OrcWriterStats;
import com.facebook.presto.orc.StorageStripeMetadataSource;
import com.facebook.presto.orc.StripeMetadataSourceFactory;
import com.facebook.presto.orc.TupleDomainOrcPredicate;
Expand Down Expand Up @@ -167,7 +167,7 @@ public class OrcStorageManager
private final ExecutorService commitExecutor;
private final OrcDataEnvironment orcDataEnvironment;
private final OrcFileRewriter fileRewriter;
private final OrcWriterStats stats = new OrcWriterStats();
private final NoOpOrcWriterStats stats = new NoOpOrcWriterStats();
private final OrcFileTailSource orcFileTailSource;
private final StripeMetadataSourceFactory stripeMetadataSourceFactory;

Expand Down
Loading

0 comments on commit c5c0866

Please sign in to comment.