Skip to content

Commit

Permalink
[SPARK-49773][SQL] Uncaught Java exception from make_timestamp() wi…
Browse files Browse the repository at this point in the history
…th bad timezone

### What changes were proposed in this pull request?

This PR proposes to fix the uncaught Java exception from `make_timestamp()` with bad timezone

### Why are the changes needed?

To improve the error message

### Does this PR introduce _any_ user-facing change?

No API changes, but the user-facing error message is changed:

**Before**
```
spark-sql (default)> select make_timestamp(1, 2, 28, 23, 1, 1, -100);
Invalid ID for ZoneOffset, invalid format: -100
```

**After**
```
spark-sql (default)> select make_timestamp(1, 2, 28, 23, 1, 1, -100);
[INVALID_TIMEZONE] The timezone: -100 is invalid.  The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form ‘area/city’, such as ‘America/Los_Angeles’. Zone offsets must be in the format ‘(+|-)HH’, ‘(+|-)HH:mm’ or ‘(+|-)HH:mm:ss’, e.g ‘-08’, ‘+01:00’ or ‘-13:33:33’., and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'. SQLSTATE: 22009
```

### How was this patch tested?

CI

### Was this patch authored or co-authored using generative AI tooling?

No

Closes apache#48260 from itholic/SPARK-49773.

Lead-authored-by: Haejoon Lee <haejoon.lee@databricks.com>
Co-authored-by: Haejoon Lee <haejoon@apache.org>
Signed-off-by: Haejoon Lee <haejoon.lee@databricks.com>
  • Loading branch information
2 people authored and himadripal committed Oct 19, 2024
1 parent 51733d9 commit 4273008
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 9 deletions.
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -3113,6 +3113,12 @@
],
"sqlState" : "42K0F"
},
"INVALID_TIMEZONE" : {
"message" : [
"The timezone: <timeZone> is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm’ or '(+|-)HH:mm:ss', e.g '-08' , '+01:00' or '-13:33:33', and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'."
],
"sqlState" : "22009"
},
"INVALID_TIME_TRAVEL_SPEC" : {
"message" : [
"Cannot specify both version and timestamp when time travelling the table."
Expand Down
23 changes: 20 additions & 3 deletions common/utils/src/main/scala/org/apache/spark/SparkException.scala
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,9 @@ private[spark] class SparkDateTimeException private(
message: String,
errorClass: Option[String],
messageParameters: Map[String, String],
context: Array[QueryContext])
extends DateTimeException(message) with SparkThrowable {
context: Array[QueryContext],
cause: Option[Throwable])
extends DateTimeException(message, cause.orNull) with SparkThrowable {

def this(
errorClass: String,
Expand All @@ -318,7 +319,23 @@ private[spark] class SparkDateTimeException private(
SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
Option(errorClass),
messageParameters,
context
context,
cause = None
)
}

def this(
errorClass: String,
messageParameters: Map[String, String],
context: Array[QueryContext],
summary: String,
cause: Option[Throwable]) = {
this(
SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
Option(errorClass),
messageParameters,
context,
cause.orElse(None)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@ trait SparkDateTimeUtils {
final val singleMinuteTz = Pattern.compile("(\\+|\\-)(\\d\\d):(\\d)$")

def getZoneId(timeZoneId: String): ZoneId = {
// To support the (+|-)h:mm format because it was supported before Spark 3.0.
var formattedZoneId = singleHourTz.matcher(timeZoneId).replaceFirst("$10$2:")
// To support the (+|-)hh:m format because it was supported before Spark 3.0.
formattedZoneId = singleMinuteTz.matcher(formattedZoneId).replaceFirst("$1$2:0$3")

ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS)
try {
// To support the (+|-)h:mm format because it was supported before Spark 3.0.
var formattedZoneId = singleHourTz.matcher(timeZoneId).replaceFirst("$10$2:")
// To support the (+|-)hh:m format because it was supported before Spark 3.0.
formattedZoneId = singleMinuteTz.matcher(formattedZoneId).replaceFirst("$1$2:0$3")
ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS)
} catch {
case e: java.time.DateTimeException =>
throw ExecutionErrors.zoneOffsetError(timeZoneId, e)
}
}

def getTimeZone(timeZoneId: String): TimeZone = TimeZone.getTimeZone(getZoneId(timeZoneId))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,17 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
"encoderType" -> encoder.getClass.getName,
"docroot" -> SparkBuildInfo.spark_doc_root))
}

def zoneOffsetError(
timeZone: String,
e: java.time.DateTimeException): SparkDateTimeException = {
new SparkDateTimeException(
errorClass = "INVALID_TIMEZONE",
messageParameters = Map("timeZone" -> timeZone),
context = Array.empty,
summary = "",
cause = Some(e))
}
}

private[sql] object ExecutionErrors extends ExecutionErrors
Original file line number Diff line number Diff line change
Expand Up @@ -391,4 +391,14 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
}
}
}

test("SPARK-49773: INVALID_TIMEZONE for bad timezone") {
checkError(
exception = intercept[SparkDateTimeException] {
sql("select make_timestamp(1, 2, 28, 23, 1, 1, -100)").collect()
},
condition = "INVALID_TIMEZONE",
parameters = Map("timeZone" -> "-100")
)
}
}

0 comments on commit 4273008

Please sign in to comment.