This repository has been archived by the owner on Nov 15, 2024. It is now read-only.
forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-21617][SQL] Store correct table metadata when altering schema …
…in Hive metastore. For Hive tables, the current "replace the schema" code is the correct path, except that an exception in that path should result in an error, and not in retrying in a different way. For data source tables, Spark may generate a non-compatible Hive table; but for that to work with Hive 2.1, the detection of data source tables needs to be fixed in the Hive client, to also consider the raw tables used by code such as `alterTableSchema`. Tested with existing and added unit tests (plus internal tests with a 2.1 metastore). Author: Marcelo Vanzin <vanzin@cloudera.com> Closes apache#18849 from vanzin/SPARK-21617. (cherry picked from commit 84b5b16) Signed-off-by: gatorsmile <gatorsmile@gmail.com>
- Loading branch information
1 parent
6af9f83
commit 77d2d87
Showing
4 changed files
with
171 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
126 changes: 126 additions & 0 deletions
126
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.hive.execution | ||
|
||
import scala.language.existentials | ||
|
||
import org.apache.hadoop.conf.Configuration | ||
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} | ||
|
||
import org.apache.spark.{SparkConf, SparkFunSuite} | ||
import org.apache.spark.launcher.SparkLauncher | ||
import org.apache.spark.sql.AnalysisException | ||
import org.apache.spark.sql.catalyst.catalog._ | ||
import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} | ||
import org.apache.spark.sql.hive.test.TestHiveSingleton | ||
import org.apache.spark.sql.internal.StaticSQLConf._ | ||
import org.apache.spark.sql.types._ | ||
import org.apache.spark.tags.ExtendedHiveTest | ||
import org.apache.spark.util.Utils | ||
|
||
/** | ||
* A separate set of DDL tests that uses Hive 2.1 libraries, which behave a little differently | ||
* from the built-in ones. | ||
*/ | ||
@ExtendedHiveTest | ||
class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton with BeforeAndAfterEach | ||
with BeforeAndAfterAll { | ||
|
||
// Create a custom HiveExternalCatalog instance with the desired configuration. We cannot | ||
// use SparkSession here since there's already an active on managed by the TestHive object. | ||
private var catalog = { | ||
val warehouse = Utils.createTempDir() | ||
val metastore = Utils.createTempDir() | ||
metastore.delete() | ||
val sparkConf = new SparkConf() | ||
.set(SparkLauncher.SPARK_MASTER, "local") | ||
.set(WAREHOUSE_PATH.key, warehouse.toURI().toString()) | ||
.set(CATALOG_IMPLEMENTATION.key, "hive") | ||
.set(HiveUtils.HIVE_METASTORE_VERSION.key, "2.1") | ||
.set(HiveUtils.HIVE_METASTORE_JARS.key, "maven") | ||
|
||
val hadoopConf = new Configuration() | ||
hadoopConf.set("hive.metastore.warehouse.dir", warehouse.toURI().toString()) | ||
hadoopConf.set("javax.jdo.option.ConnectionURL", | ||
s"jdbc:derby:;databaseName=${metastore.getAbsolutePath()};create=true") | ||
// These options are needed since the defaults in Hive 2.1 cause exceptions with an | ||
// empty metastore db. | ||
hadoopConf.set("datanucleus.schema.autoCreateAll", "true") | ||
hadoopConf.set("hive.metastore.schema.verification", "false") | ||
|
||
new HiveExternalCatalog(sparkConf, hadoopConf) | ||
} | ||
|
||
override def afterEach: Unit = { | ||
catalog.listTables("default").foreach { t => | ||
catalog.dropTable("default", t, true, false) | ||
} | ||
spark.sessionState.catalog.reset() | ||
} | ||
|
||
override def afterAll(): Unit = { | ||
catalog = null | ||
} | ||
|
||
test("SPARK-21617: ALTER TABLE for non-compatible DataSource tables") { | ||
testAlterTable( | ||
"t1", | ||
"CREATE TABLE t1 (c1 int) USING json", | ||
StructType(Array(StructField("c1", IntegerType), StructField("c2", IntegerType))), | ||
hiveCompatible = false) | ||
} | ||
|
||
test("SPARK-21617: ALTER TABLE for Hive-compatible DataSource tables") { | ||
testAlterTable( | ||
"t1", | ||
"CREATE TABLE t1 (c1 int) USING parquet", | ||
StructType(Array(StructField("c1", IntegerType), StructField("c2", IntegerType)))) | ||
} | ||
|
||
test("SPARK-21617: ALTER TABLE for Hive tables") { | ||
testAlterTable( | ||
"t1", | ||
"CREATE TABLE t1 (c1 int) STORED AS parquet", | ||
StructType(Array(StructField("c1", IntegerType), StructField("c2", IntegerType)))) | ||
} | ||
|
||
test("SPARK-21617: ALTER TABLE with incompatible schema on Hive-compatible table") { | ||
val exception = intercept[AnalysisException] { | ||
testAlterTable( | ||
"t1", | ||
"CREATE TABLE t1 (c1 string) USING parquet", | ||
StructType(Array(StructField("c2", IntegerType)))) | ||
} | ||
assert(exception.getMessage().contains("types incompatible with the existing columns")) | ||
} | ||
|
||
private def testAlterTable( | ||
tableName: String, | ||
createTableStmt: String, | ||
updatedSchema: StructType, | ||
hiveCompatible: Boolean = true): Unit = { | ||
spark.sql(createTableStmt) | ||
val oldTable = spark.sessionState.catalog.externalCatalog.getTable("default", tableName) | ||
catalog.createTable(oldTable, true) | ||
catalog.alterTableSchema("default", tableName, updatedSchema) | ||
|
||
val updatedTable = catalog.getTable("default", tableName) | ||
assert(updatedTable.schema.fieldNames === updatedSchema.fieldNames) | ||
} | ||
|
||
} |