Skip to content

Commit

Permalink
Add a metastore interface to control add partition commit batch size
Browse files Browse the repository at this point in the history
Reference to [HMS Repair Utility](shorturl.at/eHIXY), the size for adding partitions
can be in a range between 1 and 2,147,483,647.
For [Glue metastore](shorturl.at/pqDH9), it’s bounded by 100 for write access.
  • Loading branch information
fgwang7w authored and yingsu00 committed Jan 7, 2023
1 parent bb88317 commit 4fdd16e
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,11 @@ default List<TableConstraint<String>> getTableConstraints(MetastoreContext metas
{
return ImmutableList.of();
}

// Different metastore systems could implement this commit batch size differently based on different underlying database capacity.
// Default batch partition commit size is set to 10.
default int getPartitionCommitBatchSize()
{
return 10;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@
public class SemiTransactionalHiveMetastore
{
private static final Logger log = Logger.get(SemiTransactionalHiveMetastore.class);
private static final int PARTITION_COMMIT_BATCH_SIZE = 8;
private static final int MAX_LAST_DATA_COMMIT_TIME_ENTRY_PER_TABLE = 100;
private static final int MAX_LAST_DATA_COMMIT_TIME_ENTRY_PER_TRANSACTION = 10_000;

Expand Down Expand Up @@ -1513,7 +1512,7 @@ private void prepareAddPartition(MetastoreContext metastoreContext, HdfsContext

PartitionAdder partitionAdder = partitionAdders.computeIfAbsent(
partition.getSchemaTableName(),
ignored -> new PartitionAdder(metastoreContext, partition.getDatabaseName(), partition.getTableName(), delegate, PARTITION_COMMIT_BATCH_SIZE));
ignored -> new PartitionAdder(metastoreContext, partition.getDatabaseName(), partition.getTableName(), delegate));

// we can bypass the file storage path checking logic for sync partition code path
// because the file paths have been verified during early phase of the sync logic already
Expand Down Expand Up @@ -2961,13 +2960,13 @@ private static class PartitionAdder
private List<List<String>> createdPartitionValues = new ArrayList<>();
private List<MetastoreOperationResult> operationResults;

public PartitionAdder(MetastoreContext metastoreContext, String schemaName, String tableName, ExtendedHiveMetastore metastore, int batchSize)
public PartitionAdder(MetastoreContext metastoreContext, String schemaName, String tableName, ExtendedHiveMetastore metastore)
{
this.metastoreContext = requireNonNull(metastoreContext, "metastoreContext is null");
this.schemaName = schemaName;
this.tableName = tableName;
this.metastore = metastore;
this.batchSize = batchSize;
this.batchSize = metastore.getPartitionCommitBatchSize();
this.partitions = new ArrayList<>(batchSize);
this.operationResults = new ArrayList<>();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,24 @@ public class GlueHiveMetastore
private static final String PUBLIC_ROLE_NAME = "public";
private static final String DEFAULT_METASTORE_USER = "presto";
private static final String WILDCARD_EXPRESSION = "";

// This is the total number of partitions allowed to process in a big batch chunk which splits multiple smaller batch of partitions allowed by BATCH_CREATE_PARTITION_MAX_PAGE_SIZE
// Here's an example diagram on how async batches are handled for Create Partition:
// |--------BATCH_CREATE_PARTITION_MAX_PAGE_SIZE------------| ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// | p0, p1, p2 ..................................... p99 |
// |--------------------------------------------------------|
// | p0, p1, p2 ..................................... p99 |
// |--------------------------------------------------------|
// BATCH_PARTITION_COMMIT_TOTAL_SIZE / BATCH_CREATE_PARTITION_MAX_PAGE_SIZE ..... (10k/100=100 batches)
// |--------------------------------------------------------|++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// | p0, p1, p2 ..................................... p99 |
// |--------------------------------------------------------|
// | p0, p1, p2 ..................................... p99 |
// |--------------------------------------------------------|.......... (100 batches)
// |--------------------------------------------------------|++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
private static final int BATCH_PARTITION_COMMIT_TOTAL_SIZE = 10000;
private static final int BATCH_GET_PARTITION_MAX_PAGE_SIZE = 1000;
// this is the total number of partitions allowed per batch that glue metastore can process to create partitions
private static final int BATCH_CREATE_PARTITION_MAX_PAGE_SIZE = 100;
private static final int AWS_GLUE_GET_PARTITIONS_MAX_RESULTS = 1000;
private static final Comparator<Partition> PARTITION_COMPARATOR = comparing(Partition::getValues, lexicographical(String.CASE_INSENSITIVE_ORDER));
Expand Down Expand Up @@ -226,6 +243,14 @@ public GlueMetastoreStats getStats()
return stats;
}

// For Glue metastore there's an upper bound limit on 100 partitions per batch.
// Here's the reference: https://docs.aws.amazon.com/glue/latest/webapi/API_BatchCreatePartition.html
@Override
public int getPartitionCommitBatchSize()
{
return BATCH_PARTITION_COMMIT_TOTAL_SIZE;
}

@Override
public Optional<Database> getDatabase(MetastoreContext metastoreContext, String databaseName)
{
Expand Down

0 comments on commit 4fdd16e

Please sign in to comment.