-
Notifications
You must be signed in to change notification settings - Fork 5.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use dynamic split sizes in hive connector #22051
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
import com.facebook.presto.common.predicate.SortedRangeSet; | ||
import com.facebook.presto.common.predicate.TupleDomain; | ||
import com.facebook.presto.common.predicate.ValueSet; | ||
import com.facebook.presto.common.type.FixedWidthType; | ||
import com.facebook.presto.common.type.Type; | ||
import com.facebook.presto.hive.HiveBucketing.HiveBucketFilter; | ||
import com.facebook.presto.hive.metastore.Column; | ||
|
@@ -46,6 +47,8 @@ | |
import com.facebook.presto.spi.WarningCollector; | ||
import com.facebook.presto.spi.connector.ConnectorSplitManager; | ||
import com.facebook.presto.spi.connector.ConnectorTransactionHandle; | ||
import com.facebook.presto.spi.statistics.ColumnStatistics; | ||
import com.facebook.presto.spi.statistics.TableStatistics; | ||
import com.google.common.annotations.VisibleForTesting; | ||
import com.google.common.collect.AbstractIterator; | ||
import com.google.common.collect.ImmutableList; | ||
|
@@ -89,6 +92,7 @@ | |
import static com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID; | ||
import static com.facebook.presto.hive.HiveSessionProperties.getHiveMaxInitialSplitSize; | ||
import static com.facebook.presto.hive.HiveSessionProperties.getLeaseDuration; | ||
import static com.facebook.presto.hive.HiveSessionProperties.isDynamicSplitSizesEnabled; | ||
import static com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled; | ||
import static com.facebook.presto.hive.HiveSessionProperties.isPartitionStatisticsBasedOptimizationEnabled; | ||
import static com.facebook.presto.hive.HiveStorageFormat.PARQUET; | ||
|
@@ -113,11 +117,14 @@ | |
import static com.google.common.collect.Iterables.concat; | ||
import static com.google.common.collect.Iterables.getOnlyElement; | ||
import static com.google.common.collect.Iterables.transform; | ||
import static java.lang.Double.isFinite; | ||
import static java.lang.Float.floatToIntBits; | ||
import static java.lang.Math.max; | ||
import static java.lang.Math.min; | ||
import static java.lang.String.format; | ||
import static java.util.Locale.ENGLISH; | ||
import static java.util.Objects.requireNonNull; | ||
import static java.util.function.Function.identity; | ||
import static java.util.stream.Collectors.groupingBy; | ||
import static java.util.stream.Collectors.reducing; | ||
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE; | ||
|
@@ -281,6 +288,8 @@ public ConnectorSplitSource getSplits( | |
layout.getPredicateColumns(), | ||
layout.getDomainPredicate().getDomains()); | ||
|
||
double ratio = getSplitScanRatio(session, tableName, layout, metadata, partitions); | ||
|
||
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader( | ||
table, | ||
hivePartitions, | ||
|
@@ -296,16 +305,63 @@ public ConnectorSplitSource getSplits( | |
splitSchedulingContext.schedulerUsesHostAddresses(), | ||
layout.isPartialAggregationsPushedDown()); | ||
|
||
HiveSplitSource splitSource = computeSplitSource(splitSchedulingContext, table, session, hiveSplitLoader); | ||
HiveSplitSource splitSource = computeSplitSource(splitSchedulingContext, table, session, hiveSplitLoader, ratio); | ||
hiveSplitLoader.start(splitSource); | ||
|
||
return splitSource; | ||
} | ||
|
||
// Get estimated split scan ratio, which is data read by operator / data existing in split. | ||
// This ratio is used to increase split sizes if data read by operator is too small. | ||
private double getSplitScanRatio( | ||
ConnectorSession session, | ||
SchemaTableName tableName, | ||
HiveTableLayoutHandle layout, | ||
TransactionalMetadata metadata, | ||
List<HivePartition> partitions) | ||
{ | ||
if (!isDynamicSplitSizesEnabled(session)) { | ||
return 1.0; | ||
} | ||
HiveTableHandle hiveTableHandle = new HiveTableHandle(tableName.getSchemaName(), tableName.getTableName()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use the function mergeRequestedAndPredicateColumns(). It will also handle if the same struct columns appear in both sets with different subfields. |
||
|
||
Set<HiveColumnHandle> readColumnHandles = mergeRequestedAndPredicateColumns( | ||
layout.getRequestedColumns(), | ||
layout.getPredicateColumns().values().stream().collect(toImmutableSet()) | ||
).orElseGet(ImmutableSet::of); | ||
|
||
Map<String, Type> readColumnTypes = readColumnHandles.stream().collect(toImmutableMap(HiveColumnHandle::getName, handle -> metadata.getColumnMetadata(session, hiveTableHandle, handle).getType())); | ||
Map<String, ColumnHandle> readColumns = readColumnHandles.stream().collect(toImmutableMap(HiveColumnHandle::getName, identity())); | ||
|
||
TableStatistics tableStatistics = metadata.getHiveStatisticsProvider().getTableStatistics(session, tableName, readColumns, readColumnTypes, partitions); | ||
double totalSize = tableStatistics.getTotalSize().getValue(); | ||
double readSize = 0; | ||
double rowCount = tableStatistics.getRowCount().getValue(); | ||
for (Map.Entry<ColumnHandle, ColumnStatistics> entry : tableStatistics.getColumnStatistics().entrySet()) { | ||
double value = entry.getValue().getDataSize().getValue(); | ||
// We do not compute column stats for fixed width types, so count them manually. | ||
if (!isFinite(value) && isFinite(rowCount)) { | ||
HiveColumnHandle columnHandle = (HiveColumnHandle) entry.getKey(); | ||
Type type = metadata.getColumnMetadata(session, hiveTableHandle, columnHandle).getType(); | ||
if (type instanceof FixedWidthType) { | ||
int size = ((FixedWidthType) type).getFixedSize(); | ||
value = size * rowCount; | ||
} | ||
} | ||
readSize += value; | ||
} | ||
|
||
if (totalSize > 0 && isFinite(totalSize) && isFinite(readSize)) { | ||
return readSize / totalSize; | ||
} | ||
return 1.0; | ||
} | ||
|
||
private HiveSplitSource computeSplitSource(SplitSchedulingContext splitSchedulingContext, | ||
Table table, | ||
ConnectorSession session, | ||
HiveSplitLoader hiveSplitLoader) | ||
HiveSplitLoader hiveSplitLoader, | ||
double splitScanRatio) | ||
{ | ||
HiveSplitSource splitSource; | ||
CacheQuotaRequirement cacheQuotaRequirement = cacheQuotaRequirementProvider.getCacheQuotaRequirement(table.getDatabaseName(), table.getTableName()); | ||
|
@@ -321,7 +377,8 @@ private HiveSplitSource computeSplitSource(SplitSchedulingContext splitSchedulin | |
maxOutstandingSplitsSize, | ||
hiveSplitLoader, | ||
executor, | ||
new CounterStat()); | ||
new CounterStat(), | ||
splitScanRatio); | ||
break; | ||
case GROUPED_SCHEDULING: | ||
splitSource = HiveSplitSource.bucketed( | ||
|
@@ -334,7 +391,8 @@ private HiveSplitSource computeSplitSource(SplitSchedulingContext splitSchedulin | |
maxOutstandingSplitsSize, | ||
hiveSplitLoader, | ||
executor, | ||
new CounterStat()); | ||
new CounterStat(), | ||
splitScanRatio); | ||
break; | ||
case REWINDABLE_GROUPED_SCHEDULING: | ||
splitSource = HiveSplitSource.bucketedRewindable( | ||
|
@@ -346,7 +404,8 @@ private HiveSplitSource computeSplitSource(SplitSchedulingContext splitSchedulin | |
maxOutstandingSplitsSize, | ||
hiveSplitLoader, | ||
executor, | ||
new CounterStat()); | ||
new CounterStat(), | ||
splitScanRatio); | ||
break; | ||
default: | ||
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingContext.getSplitSchedulingStrategy()); | ||
|
@@ -909,7 +968,7 @@ static Optional<Set<HiveColumnHandle>> mergeRequestedAndPredicateColumns(Optiona | |
static boolean isBucketCountCompatible(int tableBucketCount, int partitionBucketCount) | ||
{ | ||
checkArgument(tableBucketCount > 0 && partitionBucketCount > 0); | ||
int larger = Math.max(tableBucketCount, partitionBucketCount); | ||
int larger = max(tableBucketCount, partitionBucketCount); | ||
int smaller = Math.min(tableBucketCount, partitionBucketCount); | ||
if (larger % smaller != 0) { | ||
// must be evenly divisible | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,6 +69,8 @@ | |
import static com.google.common.util.concurrent.Futures.immediateFuture; | ||
import static com.google.common.util.concurrent.MoreExecutors.directExecutor; | ||
import static io.airlift.units.DataSize.succinctBytes; | ||
import static java.lang.Math.max; | ||
import static java.lang.Math.min; | ||
import static java.lang.String.format; | ||
import static java.util.Objects.requireNonNull; | ||
|
||
|
@@ -98,6 +100,7 @@ class HiveSplitSource | |
private final CounterStat highMemorySplitSourceCounter; | ||
private final AtomicBoolean loggedHighMemoryWarning = new AtomicBoolean(); | ||
private final HiveSplitWeightProvider splitWeightProvider; | ||
private final double splitScanRatio; | ||
|
||
private HiveSplitSource( | ||
ConnectorSession session, | ||
|
@@ -109,7 +112,8 @@ private HiveSplitSource( | |
DataSize maxOutstandingSplitsSize, | ||
HiveSplitLoader splitLoader, | ||
CounterStat highMemorySplitSourceCounter, | ||
boolean useRewindableSplitSource) | ||
boolean useRewindableSplitSource, | ||
double splitScanRatio) | ||
{ | ||
requireNonNull(session, "session is null"); | ||
this.queryId = session.getQueryId(); | ||
|
@@ -126,6 +130,16 @@ private HiveSplitSource( | |
this.useRewindableSplitSource = useRewindableSplitSource; | ||
this.remainingInitialSplits = new AtomicInteger(maxInitialSplits); | ||
this.splitWeightProvider = isSizeBasedSplitWeightsEnabled(session) ? new SizeBasedSplitWeightProvider(getMinimumAssignedSplitWeight(session), maxSplitSize) : HiveSplitWeightProvider.uniformStandardWeightProvider(); | ||
// Clamp value within [0.1, 1.0]. | ||
// This ratio will be used to increase split sizes. The range implies | ||
// 1) We do not increase more than 10x(>= 0.1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious why is this limit? It would be helpful to update commit message to provide more details about the implementation and these kinds of limits. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you answer this question @pranjalssh? |
||
// 2) We do not decrease split sizes(<= 1.0) | ||
// We schedule only upto 10x larger splits - being conservative not to schedule splits with too many rows. | ||
// For default size of 64MB, this will keep split sizes sent within 1GB. Usually files are smaller than this. | ||
if (!Double.isFinite(splitScanRatio)) { | ||
splitScanRatio = 1.0; | ||
} | ||
this.splitScanRatio = max(min(splitScanRatio, 1.0), 0.1); | ||
} | ||
|
||
public static HiveSplitSource allAtOnce( | ||
|
@@ -138,7 +152,8 @@ public static HiveSplitSource allAtOnce( | |
DataSize maxOutstandingSplitsSize, | ||
HiveSplitLoader splitLoader, | ||
Executor executor, | ||
CounterStat highMemorySplitSourceCounter) | ||
CounterStat highMemorySplitSourceCounter, | ||
double splitScanRatio) | ||
{ | ||
return new HiveSplitSource( | ||
session, | ||
|
@@ -186,7 +201,8 @@ public int rewind(OptionalInt bucketNumber) | |
maxOutstandingSplitsSize, | ||
splitLoader, | ||
highMemorySplitSourceCounter, | ||
false); | ||
false, | ||
splitScanRatio); | ||
} | ||
|
||
public static HiveSplitSource bucketed( | ||
|
@@ -199,7 +215,8 @@ public static HiveSplitSource bucketed( | |
DataSize maxOutstandingSplitsSize, | ||
HiveSplitLoader splitLoader, | ||
Executor executor, | ||
CounterStat highMemorySplitSourceCounter) | ||
CounterStat highMemorySplitSourceCounter, | ||
double splitScanRatio) | ||
{ | ||
return new HiveSplitSource( | ||
session, | ||
|
@@ -267,7 +284,8 @@ private AsyncQueue<InternalHiveSplit> queueFor(OptionalInt bucketNumber) | |
maxOutstandingSplitsSize, | ||
splitLoader, | ||
highMemorySplitSourceCounter, | ||
false); | ||
false, | ||
splitScanRatio); | ||
} | ||
|
||
public static HiveSplitSource bucketedRewindable( | ||
|
@@ -279,7 +297,8 @@ public static HiveSplitSource bucketedRewindable( | |
DataSize maxOutstandingSplitsSize, | ||
HiveSplitLoader splitLoader, | ||
Executor executor, | ||
CounterStat highMemorySplitSourceCounter) | ||
CounterStat highMemorySplitSourceCounter, | ||
double splitScanRatio) | ||
{ | ||
return new HiveSplitSource( | ||
session, | ||
|
@@ -359,7 +378,8 @@ public int decrementAndGetPartitionReferences(InternalHiveSplit split) | |
maxOutstandingSplitsSize, | ||
splitLoader, | ||
highMemorySplitSourceCounter, | ||
true); | ||
true, | ||
splitScanRatio); | ||
} | ||
|
||
/** | ||
|
@@ -469,6 +489,8 @@ public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHan | |
maxSplitBytes = maxInitialSplitSize.toBytes(); | ||
} | ||
} | ||
// Increase split size if scanned bytes per split are expected to be less. | ||
maxSplitBytes = (long) (maxSplitBytes / splitScanRatio); | ||
InternalHiveBlock block = internalSplit.currentBlock(); | ||
long splitBytes; | ||
if (internalSplit.isSplittable()) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you clarify what splitScanRatio means? The name is confusing to me. It looks like it's the proportion of bytes we expect to read based on column stats of columns we actually use compared to the total data size.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done