diff --git a/.idea/icon.png b/.idea/icon.png deleted file mode 100644 index 6e1312485efa8..0000000000000 Binary files a/.idea/icon.png and /dev/null differ diff --git a/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java b/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java index 1ec09d5996edb..cf719ded38ead 100644 --- a/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java +++ b/presto-hdfs-core/src/main/java/com/facebook/presto/hive/HiveFileInfo.java @@ -18,7 +18,6 @@ import com.facebook.drift.annotations.ThriftStruct; import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; import org.openjdk.jol.info.ClassLayout; import java.io.IOException; @@ -69,7 +68,7 @@ public static HiveFileInfo createHiveFileInfo(LocatedFileStatus locatedFileStatu @ThriftConstructor public HiveFileInfo( - String pathString, + String path, boolean directory, List blockLocations, long length, @@ -77,7 +76,7 @@ public HiveFileInfo( Optional extraFileInfo, Map customSplitInfo) { - this.path = requireNonNull(pathString, "pathString is null"); + this.path = requireNonNull(path, "path is null"); this.isDirectory = directory; this.blockLocations = requireNonNull(blockLocations, "blockLocations is null"); this.length = length; @@ -87,9 +86,9 @@ public HiveFileInfo( } @ThriftField(1) - public String getPathString() + public String getPath() { - return path.toString(); + return path; } @ThriftField(2) @@ -128,11 +127,6 @@ public Map getCustomSplitInfo() return customSplitInfo; } - public Path getPath() - { - return new Path(path); - } - public long getRetainedSizeInBytes() { long blockLocationsSizeInBytes = blockLocations.stream().map(BlockLocation::getRetainedSizeInBytes).reduce(0L, Long::sum); @@ -141,6 +135,16 @@ public long getRetainedSizeInBytes() return INSTANCE_SIZE + path.length() + blockLocationsSizeInBytes + extraFileInfoSizeInBytes + customSplitInfoSizeInBytes; } + public String getParent() + { + return path.substring(0, path.lastIndexOf('/')); + } + + public String getFileName() + { + return path.substring(path.lastIndexOf('/') + 1); + } + @Override public boolean equals(Object o) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java index b4ec1d8dcb578..9e5eebe0d8e5e 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveBucketing.java @@ -32,7 +32,6 @@ import com.google.common.primitives.Shorts; import com.google.common.primitives.SignedBytes; import io.airlift.slice.Slice; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -76,10 +75,10 @@ public final class HiveBucketing private HiveBucketing() {} - public static int getVirtualBucketNumber(int bucketCount, Path path) + public static int getVirtualBucketNumber(int bucketCount, String path) { // this is equivalent to bucketing the table on a VARCHAR column containing $path - return (hashBytes(0, utf8Slice(path.toString())) & Integer.MAX_VALUE) % bucketCount; + return (hashBytes(0, utf8Slice(path)) & Integer.MAX_VALUE) % bucketCount; } public static int getBucket(int bucketCount, List types, Page page, int position) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java index 5b9894839ec29..b2d00458aaa82 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitPartitionInfo.java @@ -16,17 +16,13 @@ import com.facebook.presto.hive.metastore.Storage; import com.facebook.presto.spi.ColumnHandle; -import com.facebook.presto.spi.PrestoException; import org.openjdk.jol.info.ClassLayout; -import java.net.URI; -import java.net.URISyntaxException; import java.util.List; import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; -import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static io.airlift.slice.SizeOf.sizeOfObjectArray; import static java.util.Objects.requireNonNull; @@ -39,7 +35,7 @@ public class HiveSplitPartitionInfo private static final int INSTANCE_SIZE = ClassLayout.parseClass(HiveSplitPartitionInfo.class).instanceSize(); private final Storage storage; - private final URI path; + private final String path; private final List partitionKeys; private final String partitionName; private final int partitionDataColumnCount; @@ -53,7 +49,7 @@ public class HiveSplitPartitionInfo HiveSplitPartitionInfo( Storage storage, - URI path, + String path, List partitionKeys, String partitionName, int partitionDataColumnCount, @@ -72,7 +68,7 @@ public class HiveSplitPartitionInfo requireNonNull(rowIdPartitionComponent, "rowIdPartitionComponent is null"); this.storage = storage; - this.path = ensurePathHasTrailingSlash(path); + this.path = path; this.partitionKeys = partitionKeys; this.partitionName = partitionName; this.partitionDataColumnCount = partitionDataColumnCount; @@ -82,25 +78,6 @@ public class HiveSplitPartitionInfo this.rowIdPartitionComponent = rowIdPartitionComponent; } - // Hadoop path strips trailing slashes from the path string, - // and Java URI has a bug where a.resolve(a.relativize(b)) - // doesn't equal 'b' if 'a' had any components after the last slash - // https://bugs.openjdk.java.net/browse/JDK-6523089 - private static URI ensurePathHasTrailingSlash(URI path) - { - // since this is the partition path, it's always a directory. - // it's safe to add a trailing slash - if (!path.getPath().endsWith("/")) { - try { - path = new URI(path.toString() + "/"); - } - catch (URISyntaxException e) { - throw new PrestoException(GENERIC_INTERNAL_ERROR, e); - } - } - return path; - } - public Storage getStorage() { return storage; @@ -164,7 +141,7 @@ public int decrementAndGetReferences() return references.decrementAndGet(); } - public URI getPath() + public String getPath() { return path; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java index 34e879fb4f4af..55ac7edb32081 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java @@ -442,7 +442,7 @@ public static long parseHiveTimestamp(String value, DateTimeZone timeZone) return HIVE_TIMESTAMP_PARSER.withZone(timeZone).parseMillis(value); } - public static boolean isSplittable(InputFormat inputFormat, FileSystem fileSystem, Path path) + public static boolean isSplittable(InputFormat inputFormat, FileSystem fileSystem, String path) { if (inputFormat instanceof OrcInputFormat || inputFormat instanceof RCFileInputFormat) { return true; @@ -464,14 +464,14 @@ public static boolean isSplittable(InputFormat inputFormat, FileSystem fil } try { method.setAccessible(true); - return (boolean) method.invoke(inputFormat, fileSystem, path); + return (boolean) method.invoke(inputFormat, fileSystem, new Path(path)); } catch (InvocationTargetException | IllegalAccessException e) { throw new RuntimeException(e); } } - public static boolean isSelectSplittable(InputFormat inputFormat, Path path, boolean s3SelectPushdownEnabled) + public static boolean isSelectSplittable(InputFormat inputFormat, String path, boolean s3SelectPushdownEnabled) { // S3 Select supports splitting for uncompressed CSV & JSON files // Previous checks for supported input formats, SerDes, column types and S3 path @@ -479,10 +479,10 @@ public static boolean isSelectSplittable(InputFormat inputFormat, Path pat return !s3SelectPushdownEnabled || isUncompressed(inputFormat, path); } - private static boolean isUncompressed(InputFormat inputFormat, Path path) + private static boolean isUncompressed(InputFormat inputFormat, String path) { if (inputFormat instanceof TextInputFormat) { - return !getCompressionCodec((TextInputFormat) inputFormat, path).isPresent(); + return !getCompressionCodec((TextInputFormat) inputFormat, new Path(path)).isPresent(); } return false; } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java b/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java index e37a5d967ec95..9e55d561258a9 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/InternalHiveSplit.java @@ -18,7 +18,6 @@ import com.facebook.presto.spi.schedule.NodeSelectionStrategy; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import org.apache.hadoop.fs.Path; import org.openjdk.jol.info.ClassLayout; import javax.annotation.concurrent.NotThreadSafe; @@ -32,8 +31,8 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static io.airlift.slice.SizeOf.sizeOf; +import static io.airlift.slice.SizeOf.sizeOfCharArray; import static io.airlift.slice.SizeOf.sizeOfObjectArray; -import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; @NotThreadSafe @@ -45,7 +44,7 @@ public class InternalHiveSplit private static final int HOST_ADDRESS_INSTANCE_SIZE = ClassLayout.parseClass(HostAddress.class).instanceSize() + ClassLayout.parseClass(String.class).instanceSize(); - private final byte[] relativeUri; + private final String path; private final long end; private final long fileSize; private final long fileModifiedTime; @@ -72,7 +71,7 @@ public class InternalHiveSplit private int currentBlockIndex; public InternalHiveSplit( - String relativeUri, + String path, long start, long end, long fileSize, @@ -92,7 +91,7 @@ public InternalHiveSplit( checkArgument(end >= 0, "end must be non-negative"); checkArgument(fileSize >= 0, "fileSize must be non-negative"); checkArgument(fileModifiedTime >= 0, "fileModifiedTime must be non-negative"); - requireNonNull(relativeUri, "relativeUri is null"); + requireNonNull(path, "path is null"); requireNonNull(readBucketNumber, "readBucketNumber is null"); requireNonNull(tableBucketNumber, "tableBucketNumber is null"); requireNonNull(nodeSelectionStrategy, "nodeSelectionStrategy is null"); @@ -100,7 +99,7 @@ public InternalHiveSplit( requireNonNull(extraFileInfo, "extraFileInfo is null"); requireNonNull(encryptionInformation, "encryptionInformation is null"); - this.relativeUri = relativeUri.getBytes(UTF_8); + this.path = path; this.start = start; this.end = end; this.fileSize = fileSize; @@ -113,7 +112,7 @@ public InternalHiveSplit( this.partitionInfo = partitionInfo; this.extraFileInfo = extraFileInfo; this.customSplitInfo = ImmutableMap - .copyOf(requireNonNull(customSplitInfo, "customSplitInfo is null")); + .copyOf(requireNonNull(customSplitInfo, "customSplitInfo is null")); ImmutableList.Builder> addressesBuilder = ImmutableList.builder(); blockEndOffsets = new long[blocks.size()]; @@ -131,8 +130,7 @@ public InternalHiveSplit( public String getPath() { - String relativePathString = new String(relativeUri, UTF_8); - return new Path(partitionInfo.getPath().resolve(relativePathString)).toString(); + return path; } public long getStart() @@ -254,7 +252,7 @@ public void reset() public int getEstimatedSizeInBytes() { int result = INSTANCE_SIZE; - result += sizeOf(relativeUri); + result += sizeOfCharArray(path.length()); result += sizeOf(blockEndOffsets); if (!blockAddresses.isEmpty()) { result += sizeOfObjectArray(blockAddresses.size()); @@ -275,7 +273,7 @@ public int getEstimatedSizeInBytes() public String toString() { return toStringHelper(this) - .add("relativeUri", new String(relativeUri, UTF_8)) + .add("path", path) .add("start", start) .add("end", end) .add("fileSize", fileSize) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java b/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java index 0417c741b8697..b904e21a83abe 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/ManifestPartitionLoader.java @@ -159,7 +159,8 @@ private InternalHiveSplitFactory createInternalHiveSplitFactory( .map(p -> p.getColumns().size()) .orElseGet(table.getDataColumns()::size); List partitionKeys = getPartitionKeys(table, partition.getPartition(), partitionName); - Path path = new Path(getPartitionLocation(table, partition.getPartition())); + String location = getPartitionLocation(table, partition.getPartition()); + Path path = new Path(location); Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path); InputFormat inputFormat = getInputFormat(configuration, inputFormatName, false); ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, path); @@ -173,7 +174,7 @@ private InternalHiveSplitFactory createInternalHiveSplitFactory( false, new HiveSplitPartitionInfo( storage, - path.toUri(), + location, partitionKeys, partitionName, partitionDataColumnCount, @@ -201,7 +202,7 @@ private void validateManifest(ConnectorSession session, HivePartitionMetadata pa int fileCount = 0; while (fileInfoIterator.hasNext()) { HiveFileInfo fileInfo = fileInfoIterator.next(); - String fileName = fileInfo.getPath().getName(); + String fileName = fileInfo.getFileName(); if (!manifestFileNames.contains(fileName)) { throw new PrestoException( MALFORMED_HIVE_FILE_STATISTICS, diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java index 6a4052656ff46..110b7cb415505 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/StoragePartitionLoader.java @@ -160,16 +160,16 @@ public StoragePartitionLoader( } private ListenableFuture handleSymlinkTextInputFormat(ExtendedFileSystem fs, - Path path, - InputFormat inputFormat, - boolean s3SelectPushdownEnabled, - Storage storage, - List partitionKeys, - String partitionName, - int partitionDataColumnCount, - boolean stopped, - HivePartitionMetadata partition, - HiveSplitSource hiveSplitSource) + Path path, + InputFormat inputFormat, + boolean s3SelectPushdownEnabled, + Storage storage, + List partitionKeys, + String partitionName, + int partitionDataColumnCount, + boolean stopped, + HivePartitionMetadata partition, + HiveSplitSource hiveSplitSource) throws IOException { if (tableBucketInfo.isPresent()) { @@ -191,7 +191,7 @@ private ListenableFuture handleSymlinkTextInputFormat(ExtendedFileSystem fs, FileInputFormat.setInputPaths(targetJob, targetPath); InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0); - InternalHiveSplitFactory splitFactory = getHiveSplitFactory(fs, inputFormat, s3SelectPushdownEnabled, storage, path, partitionName, + InternalHiveSplitFactory splitFactory = getHiveSplitFactory(fs, inputFormat, s3SelectPushdownEnabled, storage, path.toUri().toString(), partitionName, partitionKeys, partitionDataColumnCount, partition, Optional.empty()); lastResult = addSplitsToSource(targetSplits, splitFactory, hiveSplitSource, stopped); if (stopped) { @@ -202,12 +202,12 @@ private ListenableFuture handleSymlinkTextInputFormat(ExtendedFileSystem fs, } private ListenableFuture handleGetSplitsFromInputFormat(Configuration configuration, - Path path, - Properties schema, - InputFormat inputFormat, - boolean stopped, - HiveSplitSource hiveSplitSource, - InternalHiveSplitFactory splitFactory) + Path path, + Properties schema, + InputFormat inputFormat, + boolean stopped, + HiveSplitSource hiveSplitSource, + InternalHiveSplitFactory splitFactory) throws IOException { if (tableBucketInfo.isPresent()) { @@ -224,15 +224,15 @@ private ListenableFuture handleGetSplitsFromInputFormat(Configuration configu } private InternalHiveSplitFactory getHiveSplitFactory(ExtendedFileSystem fs, - InputFormat inputFormat, - boolean s3SelectPushdownEnabled, - Storage storage, - Path path, - String partitionName, - List partitionKeys, - int partitionDataColumnCount, - HivePartitionMetadata partition, - Optional bucketConversion) + InputFormat inputFormat, + boolean s3SelectPushdownEnabled, + Storage storage, + String path, + String partitionName, + List partitionKeys, + int partitionDataColumnCount, + HivePartitionMetadata partition, + Optional bucketConversion) { return new InternalHiveSplitFactory( fs, @@ -243,7 +243,7 @@ private InternalHiveSplitFactory getHiveSplitFactory(ExtendedFileSystem fs, s3SelectPushdownEnabled, new HiveSplitPartitionInfo( storage, - path.toUri(), + path, partitionKeys, partitionName, partitionDataColumnCount, @@ -315,7 +315,7 @@ public ListenableFuture loadPartition(HivePartitionMetadata partition, HiveSp inputFormat, s3SelectPushdownEnabled, storage, - path, + location, partitionName, partitionKeys, partitionDataColumnCount, @@ -438,15 +438,15 @@ private List getBucketedSplits( } private ListMultimap computeBucketToFileInfoMapping(List fileInfos, - int partitionBucketCount, - String partitionName) + int partitionBucketCount, + String partitionName) { ListMultimap bucketToFileInfo = ArrayListMultimap.create(); if (!shouldCreateFilesForMissingBuckets(table, session)) { fileInfos.stream() .forEach(fileInfo -> { - String fileName = fileInfo.getPath().getName(); + String fileName = fileInfo.getFileName(); OptionalInt bucket = getBucketNumber(fileName); if (bucket.isPresent()) { bucketToFileInfo.put(bucket.getAsInt(), fileInfo); @@ -459,7 +459,7 @@ private ListMultimap computeBucketToFileInfoMapping(List< else { // build mapping of file name to bucket for (HiveFileInfo file : fileInfos) { - String fileName = file.getPath().getName(); + String fileName = file.getFileName(); OptionalInt bucket = getBucketNumber(fileName); if (bucket.isPresent()) { bucketToFileInfo.put(bucket.getAsInt(), file); @@ -478,10 +478,10 @@ private ListMultimap computeBucketToFileInfoMapping(List< partitionBucketCount, partitionName)); } - if (fileInfos.get(0).getPath().getName().matches("\\d+")) { + if (fileInfos.get(0).getFileName().matches("\\d+")) { try { // File names are integer if they are created when file_renaming_enabled is set to true - fileInfos.sort(Comparator.comparingInt(fileInfo -> Integer.parseInt(fileInfo.getPath().getName()))); + fileInfos.sort(Comparator.comparingInt(fileInfo -> Integer.parseInt(fileInfo.getFileName()))); } catch (NumberFormatException e) { throw new PrestoException( @@ -509,10 +509,10 @@ private ListMultimap computeBucketToFileInfoMapping(List< } private List convertFilesToInternalSplits(BucketSplitInfo bucketSplitInfo, - Optional bucketConversion, - ListMultimap bucketToFileInfo, - InternalHiveSplitFactory splitFactory, - boolean splittable) + Optional bucketConversion, + ListMultimap bucketToFileInfo, + InternalHiveSplitFactory splitFactory, + boolean splittable) { int readBucketCount = bucketSplitInfo.getReadBucketCount(); int tableBucketCount = bucketSplitInfo.getTableBucketCount(); @@ -595,7 +595,7 @@ private List getTargetPathsFromSymlink(ExtendedFileSystem fileSystem, Path List manifestFileInfos = ImmutableList.copyOf(directoryLister.list(fileSystem, table, symlinkDir, partition, namenodeStats, hiveDirectoryContext)); for (HiveFileInfo symlink : manifestFileInfos) { - try (BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(symlink.getPath()), StandardCharsets.UTF_8))) { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(symlink.getPath())), StandardCharsets.UTF_8))) { CharStreams.readLines(reader).stream() .map(Path::new) .forEach(targets::add); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java b/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java index a0adf59b7aeaa..2130f52ee933d 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/s3select/S3SelectPushdown.java @@ -87,10 +87,10 @@ private static boolean isInputFormatSupported(Properties schema) return SUPPORTED_INPUT_FORMATS.contains(inputFormat); } - public static boolean isCompressionCodecSupported(InputFormat inputFormat, Path path) + public static boolean isCompressionCodecSupported(InputFormat inputFormat, String path) { if (inputFormat instanceof TextInputFormat) { - return getCompressionCodec((TextInputFormat) inputFormat, path) + return getCompressionCodec((TextInputFormat) inputFormat, new Path(path)) .map(codec -> (codec instanceof GzipCodec) || (codec instanceof BZip2Codec)) .orElse(true); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java index a101f8b8919d7..b61e703bb0035 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/ParquetQuickStatsBuilder.java @@ -323,7 +323,7 @@ public PartitionQuickStats buildQuickStats(ConnectorSession session, ExtendedHiv while (files.hasNext()) { HiveFileInfo file = files.next(); filesCount++; - Path path = file.getPath(); + Path path = new Path(file.getPath()); long fileSize = file.getLength(); HiveFileContext hiveFileContext = new HiveFileContext( diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java index c9d0d5ab936d4..b294e87dda1a8 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/HiveFileIterator.java @@ -69,7 +69,7 @@ protected HiveFileInfo computeNext() HiveFileInfo fileInfo = getLocatedFileStatus(remoteIterator); // Ignore hidden files and directories. Hive ignores files starting with _ and . as well. - String fileName = fileInfo.getPath().getName(); + String fileName = fileInfo.getFileName(); if (fileName.startsWith("_") || fileName.startsWith(".") || (fileInfo.getLength() == 0 && skipEmptyFiles)) { continue; } @@ -79,7 +79,7 @@ protected HiveFileInfo computeNext() case IGNORED: continue; case RECURSE: - paths.add(fileInfo.getPath()); + paths.add(new Path(fileInfo.getPath())); continue; case FAIL: throw new NestedDirectoryNotAllowedException(); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java index 41a7fc01fc988..59b29074a3b4d 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java @@ -27,12 +27,10 @@ import io.airlift.units.DataSize; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import java.io.IOException; -import java.net.URI; import java.util.List; import java.util.Map; import java.util.Optional; @@ -123,7 +121,7 @@ public Optional createInternalHiveSplit(FileSplit split) FileStatus file = fileSystem.getFileStatus(split.getPath()); Map customSplitInfo = extractCustomSplitInfo(split); return createInternalHiveSplit( - split.getPath(), + split.getPath().toUri().toString(), fromHiveBlockLocations(fileSystem.getFileBlockLocations(file, split.getStart(), split.getLength())).toArray(new BlockLocation[0]), split.getStart(), split.getLength(), @@ -137,7 +135,7 @@ public Optional createInternalHiveSplit(FileSplit split) } private Optional createInternalHiveSplit( - Path path, + String path, BlockLocation[] blockLocations, long start, long length, @@ -149,9 +147,7 @@ private Optional createInternalHiveSplit( Optional extraFileInfo, Map customSplitInfo) { - String pathString = path.toString(); - - if (!infoColumnsMatchPredicates(infoColumnConstraints, pathString, fileSize, fileModificationTime)) { + if (!infoColumnsMatchPredicates(infoColumnConstraints, path, fileSize, fileModificationTime)) { return Optional.empty(); } @@ -197,9 +193,8 @@ private Optional createInternalHiveSplit( blocks = ImmutableList.of(new InternalHiveBlock(start + length, addresses)); } - URI relativePath = partitionInfo.getPath().relativize(path.toUri()); return Optional.of(new InternalHiveSplit( - relativePath.toString(), + path, start, start + length, fileSize, @@ -249,9 +244,9 @@ private static List getHostAddresses(BlockLocation blockLocation) } private static boolean infoColumnsMatchPredicates(Map constraints, - String path, - long fileSize, - long fileModificationTime) + String path, + long fileSize, + long fileModificationTime) { if (constraints.isEmpty()) { return true; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java index b7f2d0e58408e..9be421b8d3632 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitManager.java @@ -815,7 +815,7 @@ public Iterator list(ExtendedFileSystem fileSystem, Table table, P return ImmutableList.of( createHiveFileInfo( new LocatedFileStatus( - new FileStatus(0, false, 1, 0, 0, path), + new FileStatus(0, false, 1, 0, 0, new Path(path.toString() + "/" + "test_file_name")), new BlockLocation[] {}), Optional.empty())) .iterator(); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java index 7411ba49428f6..d3e3627f3732f 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveSplitSource.java @@ -25,7 +25,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.SettableFuture; import io.airlift.units.DataSize; -import org.apache.hadoop.fs.Path; import org.testng.annotations.Test; import java.time.Instant; @@ -150,7 +149,7 @@ public void testAffinitySchedulingKey() // larger than the section size DataSize fileSize = new DataSize(sectionSize.toBytes() * 3, BYTE); - hiveSplitSource.addToQueue(new TestSplit("test-relative-path", 1, OptionalInt.empty(), fileSize, SOFT_AFFINITY)); + hiveSplitSource.addToQueue(new TestSplit("path/test-relative-path", 1, OptionalInt.empty(), fileSize, SOFT_AFFINITY)); hiveSplitSource.noMoreSplits(); List splits = new ArrayList<>(); @@ -609,7 +608,7 @@ private TestSplit(String path, int id, OptionalInt bucketNumber, DataSize fileSi false, ImmutableMap.of(), ImmutableMap.of()), - new Path("path").toUri(), + "path", ImmutableList.of(), "partition-name", id, diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java index 5dd7276277112..4459d338f40b7 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHudiDirectoryLister.java @@ -128,7 +128,7 @@ public void testDirectoryListerForHudiTable() new RuntimeStats())); assertTrue(fileInfoIterator.hasNext()); HiveFileInfo fileInfo = fileInfoIterator.next(); - assertEquals(fileInfo.getPath().getName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); + assertEquals(fileInfo.getFileName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); } finally { hadoopConf = null; @@ -156,7 +156,7 @@ public void testDirectoryListerForHudiTableWithCopyOnFirstWriteEnabled() new RuntimeStats())); assertTrue(fileInfoIterator.hasNext()); HiveFileInfo fileInfo = fileInfoIterator.next(); - assertEquals(fileInfo.getPath().getName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); + assertEquals(fileInfo.getFileName(), "d0875d00-483d-4e8b-bbbe-c520366c47a0-0_0-6-11_20211217110514527.parquet"); } finally { hadoopConf = null; diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java b/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java index 326f12af98b61..7d2570f1a7817 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/s3select/TestS3SelectPushdown.java @@ -23,7 +23,6 @@ import com.facebook.presto.testing.TestingConnectorSession; import com.google.common.collect.ImmutableMap; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; @@ -95,11 +94,11 @@ public void setUp() @Test public void testIsCompressionCodecSupported() { - assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.gz"))); - assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject"))); - assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.lz4"))); - assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.snappy"))); - assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, new Path("s3://fakeBucket/fakeObject.bz2"))); + assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.gz")); + assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject")); + assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.lz4")); + assertFalse(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.snappy")); + assertTrue(S3SelectPushdown.isCompressionCodecSupported(inputFormat, "s3://fakeBucket/fakeObject.bz2")); } @Test @@ -196,20 +195,20 @@ public void testShouldNotEnableSelectPushdownWhenColumnTypesAreNotSupported() public void testShouldEnableSplits() { // Uncompressed CSV - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.csv"), true)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.csv", true)); // Pushdown disabled - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.csv"), false)); - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.json"), false)); - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.gz"), false)); - assertTrue(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.bz2"), false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.csv", false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.json", false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.gz", false)); + assertTrue(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.bz2", false)); } @Test public void testShouldNotEnableSplits() { // Compressed files - assertFalse(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.gz"), true)); - assertFalse(isSelectSplittable(inputFormat, new Path("s3://fakeBucket/fakeObject.bz2"), true)); + assertFalse(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.gz", true)); + assertFalse(isSelectSplittable(inputFormat, "s3://fakeBucket/fakeObject.bz2", true)); } @AfterClass(alwaysRun = true)