Skip to content

Commit

Permalink
Address comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
dongjoon-hyun committed Oct 13, 2017
1 parent 8ac1acf commit ef2123e
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
// SPARK-8501: Empty ORC files always have an empty schema stored in their footer. In this
// case, `OrcFileOperator.readSchema` returns `None`, and we can't read the underlying file
// using the given physical schema. Instead, we simply return an empty iterator.
val maybePhysicalSchema = OrcFileOperator.readSchema(Seq(file.filePath), Some(conf))
if (maybePhysicalSchema.isEmpty) {
val isEmptyFile = OrcFileOperator.readSchema(Seq(file.filePath), Some(conf)).isEmpty
if (isEmptyFile) {
Iterator.empty
} else {
OrcRelation.setRequiredColumns(conf, dataSchema, requiredSchema)
Expand Down Expand Up @@ -284,10 +284,7 @@ private[orc] object OrcRelation extends HiveInspectors {
case (field, ordinal) =>
var ref = oi.getStructFieldRef(field.name)
if (ref == null) {
val maybeIndex = dataSchema.getFieldIndex(field.name)
if (maybeIndex.isDefined) {
ref = oi.getStructFieldRef("_col" + maybeIndex.get)
}
ref = oi.getStructFieldRef("_col" + dataSchema.fieldIndex(field.name))
}
ref -> ordinal
}.unzip
Expand All @@ -300,7 +297,7 @@ private[orc] object OrcRelation extends HiveInspectors {
val length = fieldRefs.length
while (i < length) {
val fieldRef = fieldRefs(i)
val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw, fieldRefs(i))
val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw, fieldRef)
if (fieldValue == null) {
mutableRow.setNullAt(fieldOrdinals(i))
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2078,7 +2078,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
)

checkAnswer(
sql(s"SELECT * FROM $db.t"),
sql(s"SELECT click_id, search_id, uid, ts, hour FROM $db.t"),
Row("12", "2", 12345, "98765", "01"))

client.runSqlHive(s"ALTER TABLE $db.t ADD COLUMNS (dummy string)")
Expand All @@ -2100,30 +2100,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
Row(null, "12"))

checkAnswer(
sql(s"SELECT * FROM $db.t"),
sql(s"SELECT click_id, search_id, uid, dummy, ts, hour FROM $db.t"),
Row("12", "2", 12345, null, "98765", "01"))
}
}
}
}

// This test case is added to prevent regression.
test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") {
withTempDir { dir =>
val path = dir.getCanonicalPath

Seq(1 -> 2).toDF("c1", "c2").write.format("orc").mode("overwrite").save(path)
checkAnswer(spark.read.orc(path), Row(1, 2))

Seq("true", "false").foreach { value =>
withTable("t") {
withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'")
// The correct answer is Row(2, 1). SPARK-22267 should fix this later.
checkAnswer(spark.table("t"), if (value == "true") Row(2, 1) else Row(1, 2))
}
}
}
}
}
}

0 comments on commit ef2123e

Please sign in to comment.