Skip to content

Commit

Permalink
[SPARK-23525][SQL] Support ALTER TABLE CHANGE COLUMN COMMENT for exte…
Browse files Browse the repository at this point in the history
…rnal hive table

## What changes were proposed in this pull request?

The following query doesn't work as expected:
```
CREATE EXTERNAL TABLE ext_table(a STRING, b INT, c STRING) PARTITIONED BY (d STRING)
LOCATION 'sql/core/spark-warehouse/ext_table';
ALTER TABLE ext_table CHANGE a a STRING COMMENT "new comment";
DESC ext_table;
```
The comment of column `a` is not updated, that's because `HiveExternalCatalog.doAlterTable` ignores table schema changes. To fix the issue, we should call `doAlterTableDataSchema` instead of `doAlterTable`.

## How was this patch tested?

Updated `DDLSuite.testChangeColumn`.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes apache#20696 from jiangxb1987/alterColumnComment.
  • Loading branch information
jiangxb1987 authored and cloud-fan committed Mar 7, 2018
1 parent c99fc9a commit ac76eff
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,8 @@ case class AlterTableChangeColumnCommand(
val resolver = sparkSession.sessionState.conf.resolver
DDLUtils.verifyAlterTableType(catalog, table, isView = false)

// Find the origin column from schema by column name.
val originColumn = findColumnByName(table.schema, columnName, resolver)
// Find the origin column from dataSchema by column name.
val originColumn = findColumnByName(table.dataSchema, columnName, resolver)
// Throw an AnalysisException if the column name/dataType is changed.
if (!columnEqual(originColumn, newColumn, resolver)) {
throw new AnalysisException(
Expand All @@ -324,16 +324,15 @@ case class AlterTableChangeColumnCommand(
s"'${newColumn.name}' with type '${newColumn.dataType}'")
}

val newSchema = table.schema.fields.map { field =>
val newDataSchema = table.dataSchema.fields.map { field =>
if (field.name == originColumn.name) {
// Create a new column from the origin column with the new comment.
addComment(field, newColumn.getComment)
} else {
field
}
}
val newTable = table.copy(schema = StructType(newSchema))
catalog.alterTable(newTable)
catalog.alterTableDataSchema(tableName, StructType(newDataSchema))

Seq.empty[Row]
}
Expand All @@ -345,7 +344,8 @@ case class AlterTableChangeColumnCommand(
schema.fields.collectFirst {
case field if resolver(field.name, name) => field
}.getOrElse(throw new AnalysisException(
s"Invalid column reference '$name', table schema is '${schema}'"))
s"Can't find column `$name` given table data columns " +
s"${schema.fieldNames.mkString("[`", "`, `", "`]")}"))
}

// Add the comment to a column, if comment is empty, return the original column.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ ALTER TABLE global_temp.global_temp_view CHANGE a a INT COMMENT 'this is column
-- Change column in partition spec (not supported yet)
CREATE TABLE partition_table(a INT, b STRING, c INT, d STRING) USING parquet PARTITIONED BY (c, d);
ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT;
ALTER TABLE partition_table CHANGE COLUMN c c INT COMMENT 'this is column C';

-- DROP TEST TABLE
DROP TABLE test_change;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 32
-- Number of queries: 33


-- !query 0
Expand Down Expand Up @@ -154,7 +154,7 @@ ALTER TABLE test_change CHANGE invalid_col invalid_col INT
struct<>
-- !query 15 output
org.apache.spark.sql.AnalysisException
Invalid column reference 'invalid_col', table schema is 'StructType(StructField(a,IntegerType,true), StructField(b,StringType,true), StructField(c,IntegerType,true))';
Can't find column `invalid_col` given table data columns [`a`, `b`, `c`];


-- !query 16
Expand Down Expand Up @@ -291,16 +291,25 @@ ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT


-- !query 30
DROP TABLE test_change
ALTER TABLE partition_table CHANGE COLUMN c c INT COMMENT 'this is column C'
-- !query 30 schema
struct<>
-- !query 30 output

org.apache.spark.sql.AnalysisException
Can't find column `c` given table data columns [`a`, `b`];


-- !query 31
DROP TABLE partition_table
DROP TABLE test_change
-- !query 31 schema
struct<>
-- !query 31 output



-- !query 32
DROP TABLE partition_table
-- !query 32 schema
struct<>
-- !query 32 output

Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
// Ensure that change column will preserve other metadata fields.
sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 col1 INT COMMENT 'this is col1'")
assert(getMetadata("col1").getString("key") == "value")
assert(getMetadata("col1").getString("comment") == "this is col1")
}

test("drop build-in function") {
Expand Down

0 comments on commit ac76eff

Please sign in to comment.