Skip to content

Commit

Permalink
[SPARK-50055][SQL] Add TryMakeInterval alternative
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Addition of two new expression try_make_interval.

### Why are the changes needed?
This is a split PR from apache#48499 so that we divide the reasonings for PRs.

### Does this PR introduce _any_ user-facing change?
Yes, new expressions added.

### How was this patch tested?
Tests added.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#48580 from mihailom-db/addTryConv-TryMakeInterval.

Authored-by: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
  • Loading branch information
mihailom-db authored and MaxGekk committed Nov 7, 2024
1 parent d9c596c commit 9858ab6
Show file tree
Hide file tree
Showing 31 changed files with 675 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1925,6 +1925,47 @@ class PlanGenerationTestSuite
fn.make_interval()
}

functionTest("try_make_interval years months weeks days hours mins secs") {
fn.try_make_interval(
fn.col("a"),
fn.col("a"),
fn.col("a"),
fn.col("a"),
fn.col("a"),
fn.col("a"),
fn.col("b"))
}

functionTest("try_make_interval years months weeks days hours mins") {
fn.try_make_interval(
fn.col("a"),
fn.col("a"),
fn.col("a"),
fn.col("a"),
fn.col("a"),
fn.col("a"))
}

functionTest("try_make_interval years months weeks days hours") {
fn.try_make_interval(fn.col("a"), fn.col("a"), fn.col("a"), fn.col("a"), fn.col("a"))
}

functionTest("try_make_interval years months weeks days") {
fn.try_make_interval(fn.col("a"), fn.col("a"), fn.col("a"), fn.col("a"))
}

functionTest("try_make_interval years months weeks") {
fn.try_make_interval(fn.col("a"), fn.col("a"), fn.col("a"))
}

functionTest("try_make_interval years months") {
fn.try_make_interval(fn.col("a"), fn.col("a"))
}

functionTest("try_make_interval years") {
fn.try_make_interval(fn.col("a"))
}

functionTest("make_timestamp with timezone") {
fn.make_timestamp(
fn.col("a"),
Expand Down
2 changes: 1 addition & 1 deletion docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t
- `try_make_timestamp`: identical to the function `make_timestamp`, except that it returns `NULL` result instead of throwing an exception on error.
- `try_make_timestamp_ltz`: identical to the function `make_timestamp_ltz`, except that it returns `NULL` result instead of throwing an exception on error.
- `try_make_timestamp_ntz`: identical to the function `make_timestamp_ntz`, except that it returns `NULL` result instead of throwing an exception on error.

- `try_make_interval`: identical to the function `make_interval`, except that it returns `NULL` result instead of throwing an exception on invalid interval.

### SQL Keywords (optional, disabled by default)

Expand Down
1 change: 1 addition & 0 deletions python/docs/source/reference/pyspark.sql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ Date and Timestamp Functions
to_unix_timestamp
to_utc_timestamp
trunc
try_make_interval
try_make_timestamp
try_make_timestamp_ltz
try_make_timestamp_ntz
Expand Down
25 changes: 25 additions & 0 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3714,6 +3714,31 @@ def make_dt_interval(
make_dt_interval.__doc__ = pysparkfuncs.make_dt_interval.__doc__


def try_make_interval(
years: Optional["ColumnOrName"] = None,
months: Optional["ColumnOrName"] = None,
weeks: Optional["ColumnOrName"] = None,
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
) -> Column:
_years = lit(0) if years is None else _to_col(years)
_months = lit(0) if months is None else _to_col(months)
_weeks = lit(0) if weeks is None else _to_col(weeks)
_days = lit(0) if days is None else _to_col(days)
_hours = lit(0) if hours is None else _to_col(hours)
_mins = lit(0) if mins is None else _to_col(mins)
_secs = lit(decimal.Decimal(0)) if secs is None else _to_col(secs)

return _invoke_function_over_columns(
"try_make_interval", _years, _months, _weeks, _days, _hours, _mins, _secs
)


try_make_interval.__doc__ = pysparkfuncs.try_make_interval.__doc__


def make_interval(
years: Optional["ColumnOrName"] = None,
months: Optional["ColumnOrName"] = None,
Expand Down
156 changes: 156 additions & 0 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -21850,6 +21850,162 @@ def make_dt_interval(
return _invoke_function_over_columns("make_dt_interval", _days, _hours, _mins, _secs)


@_try_remote_functions
def try_make_interval(
years: Optional["ColumnOrName"] = None,
months: Optional["ColumnOrName"] = None,
weeks: Optional["ColumnOrName"] = None,
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
) -> Column:
"""
This is a special version of `make_interval` that performs the same operation, but returns a
NULL value instead of raising an error if interval cannot be created.

.. versionadded:: 4.0.0

Parameters
----------
years : :class:`~pyspark.sql.Column` or str, optional
The number of years, positive or negative.
months : :class:`~pyspark.sql.Column` or str, optional
The number of months, positive or negative.
weeks : :class:`~pyspark.sql.Column` or str, optional
The number of weeks, positive or negative.
days : :class:`~pyspark.sql.Column` or str, optional
The number of days, positive or negative.
hours : :class:`~pyspark.sql.Column` or str, optional
The number of hours, positive or negative.
mins : :class:`~pyspark.sql.Column` or str, optional
The number of minutes, positive or negative.
secs : :class:`~pyspark.sql.Column` or str, optional
The number of seconds with the fractional part in microsecond precision.

Returns
-------
:class:`~pyspark.sql.Column`
A new column that contains an interval.

Examples
--------

Example 1: Try make interval from years, months, weeks, days, hours, mins and secs.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(
... df.year, df.month, df.week, df.day, df.hour, df.min, df.sec)
... ).show(truncate=False)
+---------------------------------------------------------------+
|try_make_interval(year, month, week, day, hour, min, sec) |
+---------------------------------------------------------------+
|100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds|
+---------------------------------------------------------------+

Example 2: Try make interval from years, months, weeks, days, hours and mins.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(
... df.year, df.month, df.week, df.day, df.hour, df.min)
... ).show(truncate=False)
+-------------------------------------------------------+
|try_make_interval(year, month, week, day, hour, min, 0)|
+-------------------------------------------------------+
|100 years 11 months 8 days 12 hours 30 minutes |
+-------------------------------------------------------+

Example 3: Try make interval from years, months, weeks, days and hours.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(
... df.year, df.month, df.week, df.day, df.hour)
... ).show(truncate=False)
+-----------------------------------------------------+
|try_make_interval(year, month, week, day, hour, 0, 0)|
+-----------------------------------------------------+
|100 years 11 months 8 days 12 hours |
+-----------------------------------------------------+

Example 4: Try make interval from years, months, weeks and days.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(df.year, df.month, df.week, df.day)).show(truncate=False)
+--------------------------------------------------+
|try_make_interval(year, month, week, day, 0, 0, 0)|
+--------------------------------------------------+
|100 years 11 months 8 days |
+--------------------------------------------------+

Example 5: Try make interval from years, months and weeks.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(df.year, df.month, df.week)).show(truncate=False)
+------------------------------------------------+
|try_make_interval(year, month, week, 0, 0, 0, 0)|
+------------------------------------------------+
|100 years 11 months 7 days |
+------------------------------------------------+

Example 6: Try make interval from years and months.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(df.year, df.month)).show(truncate=False)
+---------------------------------------------+
|try_make_interval(year, month, 0, 0, 0, 0, 0)|
+---------------------------------------------+
|100 years 11 months |
+---------------------------------------------+

Example 7: Try make interval from years.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(df.year)).show(truncate=False)
+-----------------------------------------+
|try_make_interval(year, 0, 0, 0, 0, 0, 0)|
+-----------------------------------------+
|100 years |
+-----------------------------------------+

Example 8: Try make interval from years with overflow.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[2147483647, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.try_make_interval(df.year)).show(truncate=False)
+-----------------------------------------+
|try_make_interval(year, 0, 0, 0, 0, 0, 0)|
+-----------------------------------------+
|NULL |
+-----------------------------------------+

"""
_years = lit(0) if years is None else years
_months = lit(0) if months is None else months
_weeks = lit(0) if weeks is None else weeks
_days = lit(0) if days is None else days
_hours = lit(0) if hours is None else hours
_mins = lit(0) if mins is None else mins
_secs = lit(decimal.Decimal(0)) if secs is None else secs
return _invoke_function_over_columns(
"try_make_interval", _years, _months, _weeks, _days, _hours, _mins, _secs
)


@_try_remote_functions
def make_interval(
years: Optional["ColumnOrName"] = None,
Expand Down
5 changes: 5 additions & 0 deletions python/pyspark/sql/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,11 @@ def test_collation(self):
actual = df.select(F.collation(F.collate("name", "UNICODE"))).distinct().collect()
self.assertEqual([Row("UNICODE")], actual)

def test_try_make_interval(self):
df = self.spark.createDataFrame([(2147483647,)], ["num"])
actual = df.select(F.isnull(F.try_make_interval("num"))).collect()
self.assertEqual([Row(True)], actual)

def test_octet_length_function(self):
# SPARK-36751: add octet length api for python
df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
Expand Down
Loading

0 comments on commit 9858ab6

Please sign in to comment.