Skip to content

Commit

Permalink
[SPARK-47202][PYTHON][TESTS][FOLLOW-UP] Test timestamp with tzinfo in…
Browse files Browse the repository at this point in the history
… toPandas and createDataFrame with Arrow optimized

### What changes were proposed in this pull request?

This PR is a follow up of apache#45301 that actually test the change.

### Why are the changes needed?

To prevent a regression.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manually ran the tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes apache#45308 from HyukjinKwon/SPARK-47202-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
HyukjinKwon authored and ericm-db committed Mar 5, 2024
1 parent 5a3bccd commit c66c5d2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
3 changes: 3 additions & 0 deletions python/pyspark/sql/tests/connect/test_parity_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ def test_createDataFrame_nested_timestamp(self):
def test_toPandas_nested_timestamp(self):
self.check_toPandas_nested_timestamp(True)

def test_toPandas_timestmap_tzinfo(self):
self.check_toPandas_timestmap_tzinfo(True)

def test_createDataFrame_udt(self):
self.check_createDataFrame_udt(True)

Expand Down
27 changes: 27 additions & 0 deletions python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
import datetime
import os
import threading
import calendar
import time
import unittest
from typing import cast
from collections import namedtuple
from zoneinfo import ZoneInfo

from pyspark import SparkContext, SparkConf
from pyspark.sql import Row, SparkSession
Expand Down Expand Up @@ -996,6 +998,31 @@ def check_createDataFrame_nested_timestamp(self, arrow_enabled):

self.assertEqual(df.first(), expected)

def test_toPandas_timestmap_tzinfo(self):
for arrow_enabled in [True, False]:
with self.subTest(arrow_enabled=arrow_enabled):
self.check_toPandas_timestmap_tzinfo(arrow_enabled)

def check_toPandas_timestmap_tzinfo(self, arrow_enabled):
# SPARK-47202: Test timestamp with tzinfo in toPandas and createDataFrame
ts_tzinfo = datetime.datetime(2023, 1, 1, 0, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles"))
data = pd.DataFrame({"a": [ts_tzinfo]})
df = self.spark.createDataFrame(data)

with self.sql_conf(
{
"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled,
}
):
pdf = df.toPandas()

expected = pd.DataFrame(
# Spark unsets tzinfo and converts them to localtimes.
{"a": [datetime.datetime.fromtimestamp(calendar.timegm(ts_tzinfo.utctimetuple()))]}
)

assert_frame_equal(pdf, expected)

def test_toPandas_nested_timestamp(self):
for arrow_enabled in [True, False]:
with self.subTest(arrow_enabled=arrow_enabled):
Expand Down

0 comments on commit c66c5d2

Please sign in to comment.