Skip to content

Commit 348e991

Browse files
hantangwangdyingsu00
authored andcommitted
Support HDFS and S3 path as warehouse directory for Hive file metastore
1 parent 9d46add commit 348e991

File tree

5 files changed

+55
-48
lines changed

5 files changed

+55
-48
lines changed

presto-docs/src/main/sphinx/installation/deployment.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ Run the Presto server:
421421
File-Based Metastore
422422
--------------------
423423

424-
For testing or development purposes, Presto can be configured to use a local
424+
For testing or development purposes, Presto can be configured to use a HDFS, S3, or local
425425
filesystem directory as a Hive Metastore.
426426

427427
The file-based metastore works only with the following connectors:
@@ -444,8 +444,9 @@ Configuring a File-Based Metastore
444444
hive.metastore=file
445445
hive.metastore.catalog.dir=file:///<catalog-dir>
446446
447-
Replace ``<catalog-dir>`` in the example with the path to a directory on an
448-
accessible filesystem.
447+
Replace ``file:///<catalog-dir>`` in the example with the path to a directory on an
448+
accessible filesystem. For example, use ``hdfs://<host:port>/<catalog-dir>`` on HDFS
449+
or ``s3://<bucket>/<catalog-dir>`` on an Object Storage System.
449450

450451
Using a File-Based Warehouse
451452
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java

+19-22
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@
6868
import javax.annotation.concurrent.ThreadSafe;
6969
import javax.inject.Inject;
7070

71-
import java.io.File;
7271
import java.io.IOException;
7372
import java.io.OutputStream;
7473
import java.util.ArrayDeque;
@@ -266,9 +265,19 @@ else if (table.getTableType().equals(EXTERNAL_TABLE)) {
266265
}
267266

268267
if (!table.getTableType().equals(VIRTUAL_VIEW)) {
269-
File location = new File(new Path(table.getStorage().getLocation()).toUri());
270-
checkArgument(location.isDirectory(), "Table location is not a directory: %s", location);
271-
checkArgument(location.exists(), "Table directory does not exist: %s", location);
268+
try {
269+
Path tableLocation = new Path(table.getStorage().getLocation());
270+
FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, tableLocation);
271+
if (!fileSystem.isDirectory(tableLocation)) {
272+
throw new PrestoException(HIVE_METASTORE_ERROR, "Table location is not a directory: " + tableLocation);
273+
}
274+
if (!fileSystem.exists(tableLocation)) {
275+
throw new PrestoException(HIVE_METASTORE_ERROR, "Table directory does not exist: " + tableLocation);
276+
}
277+
}
278+
catch (IOException e) {
279+
throw new PrestoException(HIVE_METASTORE_ERROR, "Could not validate table location", e);
280+
}
272281
}
273282

274283
writeSchemaFile("table", tableMetadataDirectory, tableCodec, new TableMetadata(table), false);
@@ -1189,25 +1198,13 @@ private synchronized void setTablePrivileges(
11891198
requireNonNull(tableName, "tableName is null");
11901199
requireNonNull(privileges, "privileges is null");
11911200

1192-
try {
1193-
Table table = getRequiredTable(metastoreContext, databaseName, tableName);
1194-
1195-
Path permissionsDirectory = getPermissionsDirectory(table);
1196-
1197-
metadataFileSystem.mkdirs(permissionsDirectory);
1198-
if (!metadataFileSystem.isDirectory(permissionsDirectory)) {
1199-
throw new PrestoException(HIVE_METASTORE_ERROR, "Could not create permissions directory");
1200-
}
1201+
Table table = getRequiredTable(metastoreContext, databaseName, tableName);
12011202

1202-
Path permissionFilePath = getPermissionsPath(permissionsDirectory, grantee);
1203-
List<PermissionMetadata> permissions = privileges.stream()
1204-
.map(PermissionMetadata::new)
1205-
.collect(toList());
1206-
writeFile("permissions", permissionFilePath, permissionsCodec, permissions, true);
1207-
}
1208-
catch (IOException e) {
1209-
throw new PrestoException(HIVE_METASTORE_ERROR, e);
1210-
}
1203+
Path permissionFilePath = getPermissionsPath(getPermissionsDirectory(table), grantee);
1204+
List<PermissionMetadata> permissions = privileges.stream()
1205+
.map(PermissionMetadata::new)
1206+
.collect(toList());
1207+
writeFile("permissions", permissionFilePath, permissionsCodec, permissions, true);
12111208
}
12121209

12131210
private Set<TableConstraint> readConstraintsFile(String databaseName, String tableName)

presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedSmokeTestBase.java

+30-3
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,19 @@
1616
import com.facebook.presto.Session;
1717
import com.facebook.presto.Session.SessionBuilder;
1818
import com.facebook.presto.common.type.TimeZoneKey;
19+
import com.facebook.presto.hive.HdfsEnvironment;
20+
import com.facebook.presto.hive.HiveClientConfig;
21+
import com.facebook.presto.hive.MetastoreClientConfig;
22+
import com.facebook.presto.hive.s3.HiveS3Config;
1923
import com.facebook.presto.metadata.CatalogManager;
2024
import com.facebook.presto.spi.ConnectorId;
2125
import com.facebook.presto.spi.ConnectorSession;
26+
import com.facebook.presto.spi.SchemaTableName;
2227
import com.facebook.presto.testing.MaterializedResult;
2328
import com.facebook.presto.testing.QueryRunner;
2429
import com.facebook.presto.testing.assertions.Assert;
2530
import com.facebook.presto.tests.AbstractTestIntegrationSmokeTest;
31+
import org.apache.hadoop.fs.FileSystem;
2632
import org.apache.hadoop.fs.Path;
2733
import org.apache.iceberg.Table;
2834
import org.apache.iceberg.UpdateProperties;
@@ -43,7 +49,8 @@
4349
import static com.facebook.presto.iceberg.IcebergQueryRunner.getIcebergDataDirectoryPath;
4450
import static com.facebook.presto.iceberg.IcebergUtil.MIN_FORMAT_VERSION_FOR_DELETE;
4551
import static com.facebook.presto.iceberg.procedure.RegisterTableProcedure.METADATA_FOLDER_NAME;
46-
import static com.facebook.presto.iceberg.procedure.TestIcebergRegisterAndUnregisterProcedure.getMetadataFileLocation;
52+
import static com.facebook.presto.iceberg.procedure.RegisterTableProcedure.getFileSystem;
53+
import static com.facebook.presto.iceberg.procedure.RegisterTableProcedure.resolveLatestMetadataLocation;
4754
import static com.facebook.presto.testing.MaterializedResult.resultBuilder;
4855
import static com.google.common.base.Preconditions.checkArgument;
4956
import static com.google.common.collect.Iterables.getOnlyElement;
@@ -53,6 +60,7 @@
5360
import static java.util.Objects.requireNonNull;
5461
import static java.util.stream.Collectors.joining;
5562
import static java.util.stream.IntStream.range;
63+
import static org.apache.iceberg.util.LocationUtil.stripTrailingSlash;
5664
import static org.assertj.core.api.Assertions.assertThat;
5765
import static org.assertj.core.api.Assertions.assertThatThrownBy;
5866
import static org.testng.Assert.assertEquals;
@@ -1606,7 +1614,7 @@ public void testRegisterTableWithFileName()
16061614
assertUpdate("INSERT INTO " + tableName + " VALUES(1, 1)", 1);
16071615

16081616
String metadataLocation = getLocation(schemaName, tableName);
1609-
String metadataFileName = getMetadataFileLocation(getSession().toConnectorSession(), schemaName, tableName, metadataLocation);
1617+
String metadataFileName = getMetadataFileLocation(getSession().toConnectorSession(), getHdfsEnvironment(), schemaName, tableName, metadataLocation);
16101618

16111619
// Register new table with procedure
16121620
String newTableName = tableName + "_new";
@@ -1625,7 +1633,7 @@ public void testRegisterTableWithInvalidLocation()
16251633
assertUpdate("CREATE TABLE " + tableName + " (id integer, value integer)");
16261634
assertUpdate("INSERT INTO " + tableName + " VALUES(1, 1)", 1);
16271635

1628-
String metadataLocation = getLocation(schemaName, tableName).replace("//", "") + "_invalid";
1636+
String metadataLocation = getLocation(schemaName, tableName) + "_invalid";
16291637

16301638
@Language("RegExp") String errorMessage = format("Unable to find metadata at location %s/%s", metadataLocation, METADATA_FOLDER_NAME);
16311639
assertQueryFails("CALL system.register_table ('" + schemaName + "', '" + tableName + "', '" + metadataLocation + "')", errorMessage);
@@ -2020,4 +2028,23 @@ public void testUpdateNonExistentTable()
20202028
assertQueryFails("ALTER TABLE non_existent_test_table2 SET PROPERTIES (commit_retries = 6)",
20212029
format("Table does not exist: iceberg.%s.non_existent_test_table2", getSession().getSchema().get()));
20222030
}
2031+
2032+
protected HdfsEnvironment getHdfsEnvironment()
2033+
{
2034+
HiveClientConfig hiveClientConfig = new HiveClientConfig();
2035+
MetastoreClientConfig metastoreClientConfig = new MetastoreClientConfig();
2036+
HiveS3Config hiveS3Config = new HiveS3Config();
2037+
return IcebergDistributedTestBase.getHdfsEnvironment(hiveClientConfig, metastoreClientConfig, hiveS3Config);
2038+
}
2039+
2040+
private static String getMetadataFileLocation(ConnectorSession session, HdfsEnvironment hdfsEnvironment, String schema, String table, String metadataLocation)
2041+
{
2042+
metadataLocation = stripTrailingSlash(metadataLocation);
2043+
org.apache.hadoop.fs.Path metadataDir = new org.apache.hadoop.fs.Path(metadataLocation, METADATA_FOLDER_NAME);
2044+
FileSystem fileSystem = getFileSystem(session, hdfsEnvironment, new SchemaTableName(schema, table), metadataDir);
2045+
return resolveLatestMetadataLocation(
2046+
session,
2047+
fileSystem,
2048+
metadataDir).getName();
2049+
}
20232050
}

presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergSmokeOnS3Hadoop.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public TestIcebergSmokeOnS3Hadoop()
6969
{
7070
super(HADOOP);
7171
bucketName = "forhadoop-" + randomTableSuffix();
72-
catalogWarehouseDir = createTempDirectory(bucketName).toUri().toString();
72+
catalogWarehouseDir = new Path(createTempDirectory(bucketName).toUri()).toString();
7373
}
7474

7575
protected QueryRunner createQueryRunner()
@@ -453,7 +453,7 @@ public void testTableComments()
453453
protected String getLocation(String schema, String table)
454454
{
455455
Path tempLocation = getCatalogDirectory();
456-
return format("%s/%s/%s", tempLocation.toUri(), schema, table);
456+
return format("%s/%s/%s", tempLocation.toString(), schema, table);
457457
}
458458

459459
@Override

presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergSmokeHive.java

-18
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,6 @@
1313
*/
1414
package com.facebook.presto.iceberg.hive;
1515

16-
import com.facebook.presto.hive.HdfsConfiguration;
17-
import com.facebook.presto.hive.HdfsConfigurationInitializer;
18-
import com.facebook.presto.hive.HdfsEnvironment;
19-
import com.facebook.presto.hive.HiveClientConfig;
20-
import com.facebook.presto.hive.HiveHdfsConfiguration;
21-
import com.facebook.presto.hive.MetastoreClientConfig;
22-
import com.facebook.presto.hive.authentication.NoHdfsAuthentication;
2316
import com.facebook.presto.hive.metastore.ExtendedHiveMetastore;
2417
import com.facebook.presto.iceberg.IcebergConfig;
2518
import com.facebook.presto.iceberg.IcebergDistributedSmokeTestBase;
@@ -28,7 +21,6 @@
2821
import com.facebook.presto.spi.ConnectorSession;
2922
import com.facebook.presto.spi.SchemaTableName;
3023
import com.facebook.presto.tests.DistributedQueryRunner;
31-
import com.google.common.collect.ImmutableSet;
3224
import org.apache.iceberg.Table;
3325

3426
import java.io.File;
@@ -55,16 +47,6 @@ protected String getLocation(String schema, String table)
5547
return format("%s%s/%s", tempLocation.toURI(), schema, table);
5648
}
5749

58-
protected static HdfsEnvironment getHdfsEnvironment()
59-
{
60-
HiveClientConfig hiveClientConfig = new HiveClientConfig();
61-
MetastoreClientConfig metastoreClientConfig = new MetastoreClientConfig();
62-
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, metastoreClientConfig),
63-
ImmutableSet.of(),
64-
hiveClientConfig);
65-
return new HdfsEnvironment(hdfsConfiguration, metastoreClientConfig, new NoHdfsAuthentication());
66-
}
67-
6850
protected ExtendedHiveMetastore getFileHiveMetastore()
6951
{
7052
IcebergFileHiveMetastore fileHiveMetastore = new IcebergFileHiveMetastore(getHdfsEnvironment(),

0 commit comments

Comments
 (0)