Skip to content

Commit

Permalink
Avoid footer stats when not reliable
Browse files Browse the repository at this point in the history
For partition that goes through mutation via commits, the footer stats
may not be reliable to use for optimization. As part of this commit, we
are providing an option to avoid using footer stats.
Avoid footer stats for metalake partition

For metalake partition, the footer stats are not reliable. As part of
this commit, we are avoiding to use footer stats if a particular split
has underwent mutation.
  • Loading branch information
abhiseksaikia authored and ARUNACHALAM THIRUPATHI committed Nov 17, 2022
1 parent 3fc6273 commit 71b3911
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,8 @@ private static Optional<ConnectorPageSource> createSelectivePageSource(
split.getFileSplit().getFileModifiedTime(),
HiveSessionProperties.isVerboseRuntimeStatsEnabled(session)),
encryptionInformation,
layout.isAppendRowNumberEnabled());
layout.isAppendRowNumberEnabled(),
layout.isFooterStatsUnreliable());
if (pageSource.isPresent()) {
return Optional.of(pageSource.get());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,6 @@ Optional<? extends ConnectorPageSource> createPageSource(
DateTimeZone hiveStorageTimeZone,
HiveFileContext hiveFileContext,
Optional<EncryptionInformation> encryptionInformation,
boolean appendRowNumberEnabled);
boolean appendRowNumberEnabled,
boolean footerStatsUnreliable);
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public class HiveTableLayoutHandle
private final Optional<Set<HiveColumnHandle>> requestedColumns;
private final boolean partialAggregationsPushedDown;
private final boolean appendRowNumberEnabled;
private final boolean footerStatsUnreliable;

// coordinator-only properties
private final Optional<List<HivePartition>> partitions;
Expand All @@ -86,7 +87,8 @@ public HiveTableLayoutHandle(
@JsonProperty("layoutString") String layoutString,
@JsonProperty("requestedColumns") Optional<Set<HiveColumnHandle>> requestedColumns,
@JsonProperty("partialAggregationsPushedDown") boolean partialAggregationsPushedDown,
@JsonProperty("appendRowNumber") boolean appendRowNumberEnabled)
@JsonProperty("appendRowNumber") boolean appendRowNumberEnabled,
@JsonProperty("footerStatsUnreliable") boolean footerStatsUnreliable)
{
this(
schemaTableName,
Expand All @@ -105,7 +107,8 @@ public HiveTableLayoutHandle(
requestedColumns,
partialAggregationsPushedDown,
appendRowNumberEnabled,
Optional.empty());
Optional.empty(),
footerStatsUnreliable);
}

protected HiveTableLayoutHandle(
Expand All @@ -125,7 +128,8 @@ protected HiveTableLayoutHandle(
Optional<Set<HiveColumnHandle>> requestedColumns,
boolean partialAggregationsPushedDown,
boolean appendRowNumberEnabled,
Optional<List<HivePartition>> partitions)
Optional<List<HivePartition>> partitions,
boolean footerStatsUnreliable)
{
this.schemaTableName = requireNonNull(schemaTableName, "table is null");
this.tablePath = requireNonNull(tablePath, "tablePath is null");
Expand All @@ -144,6 +148,7 @@ protected HiveTableLayoutHandle(
this.partialAggregationsPushedDown = partialAggregationsPushedDown;
this.appendRowNumberEnabled = appendRowNumberEnabled;
this.partitions = requireNonNull(partitions, "partitions is null");
this.footerStatsUnreliable = footerStatsUnreliable;
}

@JsonProperty
Expand Down Expand Up @@ -259,6 +264,12 @@ public boolean isAppendRowNumberEnabled()
return appendRowNumberEnabled;
}

@JsonProperty
public boolean isFooterStatsUnreliable()
{
return footerStatsUnreliable;
}

@Override
public Object getIdentifier(Optional<ConnectorSplit> split, PlanCanonicalizationStrategy canonicalizationStrategy)
{
Expand Down Expand Up @@ -356,7 +367,8 @@ public Builder builder()
.setRequestedColumns(getRequestedColumns())
.setPartialAggregationsPushedDown(isPartialAggregationsPushedDown())
.setAppendRowNumberEnabled(isAppendRowNumberEnabled())
.setPartitions(getPartitions());
.setPartitions(getPartitions())
.setFooterStatsUnreliable(isFooterStatsUnreliable());
}

public static class Builder
Expand All @@ -377,6 +389,7 @@ public static class Builder
private Optional<Set<HiveColumnHandle>> requestedColumns;
private boolean partialAggregationsPushedDown;
private boolean appendRowNumberEnabled;
private boolean footerStatsUnreliable;

private Optional<List<HivePartition>> partitions;

Expand Down Expand Up @@ -489,6 +502,12 @@ public Builder setPartitions(Optional<List<HivePartition>> partitions)
return this;
}

public Builder setFooterStatsUnreliable(boolean footerStatsUnreliable)
{
this.footerStatsUnreliable = footerStatsUnreliable;
return this;
}

public HiveTableLayoutHandle build()
{
return new HiveTableLayoutHandle(
Expand All @@ -508,7 +527,8 @@ public HiveTableLayoutHandle build()
requestedColumns,
partialAggregationsPushedDown,
appendRowNumberEnabled,
partitions);
partitions,
footerStatsUnreliable);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ public Optional<? extends ConnectorPageSource> createPageSource(
DateTimeZone hiveStorageTimeZone,
HiveFileContext hiveFileContext,
Optional<EncryptionInformation> encryptionInformation,
boolean appendRowNumberEnabled)
boolean appendRowNumberEnabled,
boolean footerStatsUnreliable)
{
if (!OrcSerde.class.getName().equals(storage.getStorageFormat().getSerDe())) {
return Optional.empty();
Expand Down Expand Up @@ -144,6 +145,7 @@ public Optional<? extends ConnectorPageSource> createPageSource(
tupleDomainFilterCache,
encryptionInformation,
dwrfEncryptionProvider,
appendRowNumberEnabled));
appendRowNumberEnabled,
footerStatsUnreliable));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ public Optional<? extends ConnectorPageSource> createPageSource(
DateTimeZone hiveStorageTimeZone,
HiveFileContext hiveFileContext,
Optional<EncryptionInformation> encryptionInformation,
boolean appendRowNumberEnabled)
boolean appendRowNumberEnabled,
boolean footerStatsUnreliable)
{
if (!OrcSerde.class.getName().equals(storage.getStorageFormat().getSerDe())) {
return Optional.empty();
Expand Down Expand Up @@ -251,7 +252,8 @@ public Optional<? extends ConnectorPageSource> createPageSource(
tupleDomainFilterCache,
encryptionInformation,
NO_ENCRYPTION,
appendRowNumberEnabled));
appendRowNumberEnabled,
footerStatsUnreliable));
}

public static ConnectorPageSource createOrcPageSource(
Expand Down Expand Up @@ -281,7 +283,8 @@ public static ConnectorPageSource createOrcPageSource(
TupleDomainFilterCache tupleDomainFilterCache,
Optional<EncryptionInformation> encryptionInformation,
DwrfEncryptionProvider dwrfEncryptionProvider,
boolean appendRowNumberEnabled)
boolean appendRowNumberEnabled,
boolean footerStatsUnreliable)
{
checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");

Expand Down Expand Up @@ -340,7 +343,7 @@ public static ConnectorPageSource createOrcPageSource(

List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader.getTypes(), path);

if (!physicalColumns.isEmpty() && physicalColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
if (!footerStatsUnreliable && !physicalColumns.isEmpty() && physicalColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
return new AggregatedOrcPageSource(physicalColumns, reader.getFooter(), typeManager, functionResolution);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ public Optional<? extends ConnectorPageSource> createPageSource(
DateTimeZone hiveStorageTimeZone,
HiveFileContext hiveFileContext,
Optional<EncryptionInformation> encryptionInformation,
boolean appendRowNumberEnabled)
boolean appendRowNumberEnabled,
boolean footerStatsUnreliable)
{
if (!PARQUET_SERDE_CLASS_NAMES.contains(storage.getStorageFormat().getSerDe())) {
return Optional.empty();
Expand Down

0 comments on commit 71b3911

Please sign in to comment.