-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-48994][SQL][PYTHON][VARIANT] Add support for interval types in the Variant Spec #47473
Changes from all commits
51f8fb0
be0bfc2
2e0fc07
f25b2f6
b7fe993
59d8572
f073b75
e727fb1
fa1b481
2ce9273
9ab3ccb
9aecb2c
73cfde8
d3e4193
417e419
57660d6
52fed53
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.util; | ||
|
||
import org.apache.spark.SparkException; | ||
|
||
import java.math.BigDecimal; | ||
import java.util.ArrayList; | ||
|
||
// Replicating code from SparkIntervalUtils so code in the 'common' space can work with | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why replicating? I think other modules depend on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. However, I believe that should be a different PR as it would be a big change in itself. For the purposes of this PR, these functions only support ANSIStyle while the functions on SQL also expect Hive style sometimes. |
||
// year-month intervals. | ||
public class DayTimeIntervalUtils { | ||
private static byte DAY = 0; | ||
private static byte HOUR = 1; | ||
private static byte MINUTE = 2; | ||
private static byte SECOND = 3; | ||
private static long HOURS_PER_DAY = 24; | ||
private static long MINUTES_PER_HOUR = 60; | ||
private static long SECONDS_PER_MINUTE = 60; | ||
private static long MILLIS_PER_SECOND = 1000; | ||
private static long MICROS_PER_MILLIS = 1000; | ||
private static long MICROS_PER_SECOND = MICROS_PER_MILLIS * MILLIS_PER_SECOND; | ||
private static long MICROS_PER_MINUTE = SECONDS_PER_MINUTE * MICROS_PER_SECOND; | ||
private static long MICROS_PER_HOUR = MINUTES_PER_HOUR * MICROS_PER_MINUTE; | ||
private static long MICROS_PER_DAY = HOURS_PER_DAY * MICROS_PER_HOUR; | ||
private static long MAX_DAY = Long.MAX_VALUE / MICROS_PER_DAY; | ||
private static long MAX_HOUR = Long.MAX_VALUE / MICROS_PER_HOUR; | ||
private static long MAX_MINUTE = Long.MAX_VALUE / MICROS_PER_MINUTE; | ||
private static long MAX_SECOND = Long.MAX_VALUE / MICROS_PER_SECOND; | ||
|
||
public static String fieldToString(byte field) throws SparkException { | ||
if (field == DAY) { | ||
return "DAY"; | ||
} else if (field == HOUR) { | ||
return "HOUR"; | ||
} else if (field == MINUTE) { | ||
return "MINUTE"; | ||
} else if (field == SECOND) { | ||
return "SECOND"; | ||
} else { | ||
throw new SparkException("Invalid field in day-time interval: " + field + | ||
". Supported fields are: DAY, HOUR, MINUTE, SECOND"); | ||
} | ||
} | ||
|
||
// Used to convert microseconds representing a day-time interval with given start and end fields | ||
// to its ANSI SQL string representation. Throws a SparkException if startField or endField are | ||
// out of bounds. | ||
public static String toDayTimeIntervalANSIString(long micros, byte startField, byte endField) | ||
throws SparkException { | ||
String sign = ""; | ||
long rest = micros; | ||
try { | ||
String from = fieldToString(startField).toUpperCase(); | ||
String to = fieldToString(endField).toUpperCase(); | ||
String prefix = "INTERVAL '"; | ||
String postfix = startField == endField ? "' " + from : "' " + from + " TO " + to; | ||
if (micros < 0) { | ||
if (micros == Long.MIN_VALUE) { | ||
// Especial handling of minimum `Long` value because negate op overflows `Long`. | ||
// seconds = 106751991 * (24 * 60 * 60) + 4 * 60 * 60 + 54 = 9223372036854 | ||
// microseconds = -9223372036854000000L-775808 == Long.MinValue | ||
String baseStr = "-106751991 04:00:54.775808000"; | ||
String firstStr = "-" + (startField == DAY ? Long.toString(MAX_DAY) : | ||
(startField == HOUR ? Long.toString(MAX_HOUR) : | ||
(startField == MINUTE ? Long.toString(MAX_MINUTE) : | ||
Long.toString(MAX_SECOND) + ".775808"))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if (startField == endField) { | ||
return prefix + firstStr + postfix; | ||
} else { | ||
int substrStart = startField == DAY ? 10 : (startField == HOUR ? 13 : 16); | ||
int substrEnd = endField == HOUR ? 13 : (endField == MINUTE ? 16 : 26); | ||
return prefix + firstStr + baseStr.substring(substrStart, substrEnd) + postfix; | ||
} | ||
} else { | ||
sign = "-"; | ||
rest = -rest; | ||
} | ||
} | ||
StringBuilder formatBuilder = new StringBuilder(sign); | ||
ArrayList<Long> formatArgs = new ArrayList<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if (startField == DAY) { | ||
formatBuilder.append(rest / MICROS_PER_DAY); | ||
rest %= MICROS_PER_DAY; | ||
} else if (startField == HOUR) { | ||
formatBuilder.append("%02d"); | ||
formatArgs.add(rest / MICROS_PER_HOUR); | ||
rest %= MICROS_PER_HOUR; | ||
} else if (startField == MINUTE) { | ||
formatBuilder.append("%02d"); | ||
formatArgs.add(rest / MICROS_PER_MINUTE); | ||
rest %= MICROS_PER_MINUTE; | ||
} else if (startField == SECOND) { | ||
String leadZero = rest < 10 * MICROS_PER_SECOND ? "0" : ""; | ||
formatBuilder.append(leadZero + BigDecimal.valueOf(rest, 6) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should use chained calls, like formatBuilder.append(leadZero).append(BigDecimal.valueOf(rest, 6).stripTrailingZeros().toPlainString()); otherwise |
||
.stripTrailingZeros().toPlainString()); | ||
} | ||
|
||
if (startField < HOUR && HOUR <= endField) { | ||
formatBuilder.append(" %02d"); | ||
formatArgs.add(rest / MICROS_PER_HOUR); | ||
rest %= MICROS_PER_HOUR; | ||
} | ||
if (startField < MINUTE && MINUTE <= endField) { | ||
formatBuilder.append(":%02d"); | ||
formatArgs.add(rest / MICROS_PER_MINUTE); | ||
rest %= MICROS_PER_MINUTE; | ||
} | ||
if (startField < SECOND && SECOND <= endField) { | ||
String leadZero = rest < 10 * MICROS_PER_SECOND ? "0" : ""; | ||
formatBuilder.append(":" + leadZero + BigDecimal.valueOf(rest, 6) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
.stripTrailingZeros().toPlainString()); | ||
} | ||
return prefix + String.format(formatBuilder.toString(), formatArgs.toArray()) + postfix; | ||
} catch (SparkException e) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
throw e; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.util; | ||
|
||
// Replicating code from SparkIntervalUtils so code in the 'common' space can work with | ||
// year-month intervals. | ||
public class YearMonthIntervalUtils { | ||
private static byte YEAR = 0; | ||
private static byte MONTH = 1; | ||
private static int MONTHS_PER_YEAR = 12; | ||
|
||
// Used to convert months representing a year-month interval with given start and end fields | ||
// to its ANSI SQL string representation. | ||
public static String toYearMonthIntervalANSIString(int months, byte startField, byte endField) { | ||
String sign = ""; | ||
long absMonths = months; | ||
if (months < 0) { | ||
sign = "-"; | ||
absMonths = -absMonths; | ||
} | ||
String year = sign + Long.toString(absMonths / MONTHS_PER_YEAR); | ||
String yearAndMonth = year + "-" + Long.toString(absMonths % MONTHS_PER_YEAR); | ||
StringBuilder formatBuilder = new StringBuilder("INTERVAL '"); | ||
if (startField == endField) { | ||
if (startField == YEAR) { | ||
formatBuilder.append(year + "' YEAR"); | ||
} else { | ||
formatBuilder.append(Integer.toString(months) + "' MONTH"); | ||
} | ||
} else { | ||
formatBuilder.append(yearAndMonth + "' YEAR TO MONTH"); | ||
} | ||
return formatBuilder.toString(); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The import order rule in the newly added Java file should be consistent with that of the Scala files.