diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index a7027e89e187b..89ba14bf7ff62 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3468,6 +3468,24 @@ ], "sqlState" : "42805" }, + "PARQUET_TYPE_ILLEGAL" : { + "message" : [ + "Illegal Parquet type: ." + ], + "sqlState" : "42846" + }, + "PARQUET_TYPE_NOT_RECOGNIZED" : { + "message" : [ + "Unrecognized Parquet type: ." + ], + "sqlState" : "42846" + }, + "PARQUET_TYPE_NOT_SUPPORTED" : { + "message" : [ + "Parquet type not yet supported: ." + ], + "sqlState" : "42846" + }, "PARSE_EMPTY_STATEMENT" : { "message" : [ "Syntax error, unexpected empty statement." @@ -5779,21 +5797,6 @@ "createTableColumnTypes option column not found in schema ." ] }, - "_LEGACY_ERROR_TEMP_1172" : { - "message" : [ - "Parquet type not yet supported: ." - ] - }, - "_LEGACY_ERROR_TEMP_1173" : { - "message" : [ - "Illegal Parquet type: ." - ] - }, - "_LEGACY_ERROR_TEMP_1174" : { - "message" : [ - "Unrecognized Parquet type: ." - ] - }, "_LEGACY_ERROR_TEMP_1181" : { "message" : [ "Stream-stream join without equality predicate is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 73a98f9fe4be8..e399961459cfe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1938,19 +1938,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat def parquetTypeUnsupportedYetError(parquetType: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1172", + errorClass = "PARQUET_TYPE_NOT_SUPPORTED", messageParameters = Map("parquetType" -> parquetType)) } def illegalParquetTypeError(parquetType: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1173", + errorClass = "PARQUET_TYPE_ILLEGAL", messageParameters = Map("parquetType" -> parquetType)) } def unrecognizedParquetTypeError(field: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1174", + errorClass = "PARQUET_TYPE_NOT_RECOGNIZED", messageParameters = Map("field" -> field)) } diff --git a/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet new file mode 100644 index 0000000000000..d315eb467a02b Binary files /dev/null and b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet differ diff --git a/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet new file mode 100644 index 0000000000000..1504c6e4b4c84 Binary files /dev/null and b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet differ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index 25f6af1cc3384..074781da830fe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -1111,10 +1111,37 @@ class ParquetSchemaSuite extends ParquetSchemaTest { test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") { val testDataPath = testFile("test-data/timestamp-nanos.parquet") - val e = intercept[AnalysisException] { - spark.read.parquet(testDataPath).collect() - } - assert(e.getMessage.contains("Illegal Parquet type: INT64 (TIMESTAMP(NANOS,true)).")) + checkError( + exception = intercept[AnalysisException] { + spark.read.parquet(testDataPath).collect() + }, + errorClass = "PARQUET_TYPE_ILLEGAL", + parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))") + ) + } + + test("SPARK-47261: parquet file with unsupported type") { + val testDataPath = testFile("test-data/interval-using-fixed-len-byte-array.parquet") + checkError( + exception = intercept[AnalysisException] { + spark.read.parquet(testDataPath).collect() + }, + errorClass = "PARQUET_TYPE_NOT_SUPPORTED", + parameters = Map("parquetType" -> "FIXED_LEN_BYTE_ARRAY (INTERVAL)") + ) + } + + test("SPARK-47261: parquet file with unrecognized parquet type") { + val testDataPath = testFile("test-data/group-field-with-enum-as-logical-annotation.parquet") + val expectedParameter = "required group my_list (ENUM) {\n repeated group list {\n" + + " optional binary element (STRING);\n }\n}" + checkError( + exception = intercept[AnalysisException] { + spark.read.parquet(testDataPath).collect() + }, + errorClass = "PARQUET_TYPE_NOT_RECOGNIZED", + parameters = Map("field" -> expectedParameter) + ) } // =======================================================