Skip to content

Commit

Permalink
[SPARK-22146] FileNotFoundException while reading ORC files containin…
Browse files Browse the repository at this point in the history
…g special characters

## What changes were proposed in this pull request?

Reading ORC files containing special characters like '%' fails with a FileNotFoundException.
This PR aims to fix the problem.

## How was this patch tested?

Added UT.

Author: Marco Gaido <marcogaido91@gmail.com>
Author: Marco Gaido <mgaido@hortonworks.com>

Closes #19368 from mgaido91/SPARK-22146.
  • Loading branch information
mgaido91 authored and gatorsmile committed Sep 29, 2017
1 parent 323806e commit 161ba7e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
options: Map[String, String],
files: Seq[FileStatus]): Option[StructType] = {
OrcFileOperator.readSchema(
files.map(_.getPath.toUri.toString),
files.map(_.getPath.toString),
Some(sparkSession.sessionState.newHadoopConf())
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
spark.sql("""drop database if exists testdb8156 CASCADE""")
}


test("skip hive metadata on table creation") {
withTempDir { tempPath =>
val schema = StructType((1 to 5).map(i => StructField(s"c_$i", StringType)))
Expand Down Expand Up @@ -1345,6 +1344,17 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
}
}

Seq("orc", "parquet", "csv", "json", "text").foreach { format =>
test(s"SPARK-22146: read files containing special characters using $format") {
val nameWithSpecialChars = s"sp&cial%chars"
withTempDir { dir =>
val tmpFile = s"$dir/$nameWithSpecialChars"
spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile)
spark.read.format(format).load(tmpFile)
}
}
}

private def withDebugMode(f: => Unit): Unit = {
val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE)
try {
Expand Down

0 comments on commit 161ba7e

Please sign in to comment.