Skip to content

Commit

Permalink
[SPARK-47202][PYTHON][TESTS][FOLLOW-UP] Test timestamp with tzinfo in…
Browse files Browse the repository at this point in the history
… toPandas and createDataFrame with Arrow optimized

### What changes were proposed in this pull request?

This PR is a follow up of #45301 that actually test the change.

### Why are the changes needed?

To prevent a regression.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manually ran the tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45308 from HyukjinKwon/SPARK-47202-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 721c2a4)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
HyukjinKwon committed Feb 28, 2024
1 parent c0a4416 commit e6f3dd9
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
3 changes: 3 additions & 0 deletions python/pyspark/sql/tests/connect/test_parity_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ def test_createDataFrame_nested_timestamp(self):
def test_toPandas_nested_timestamp(self):
self.check_toPandas_nested_timestamp(True)

def test_toPandas_timestmap_tzinfo(self):
self.check_toPandas_timestmap_tzinfo(True)

def test_createDataFrame_udt(self):
self.check_createDataFrame_udt(True)

Expand Down
27 changes: 27 additions & 0 deletions python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
import datetime
import os
import threading
import calendar
import time
import unittest
import warnings
from distutils.version import LooseVersion
from typing import cast
from collections import namedtuple
from zoneinfo import ZoneInfo

from pyspark import SparkContext, SparkConf
from pyspark.sql import Row, SparkSession
Expand Down Expand Up @@ -1090,6 +1092,31 @@ def check_createDataFrame_nested_timestamp(self, arrow_enabled):

self.assertEqual(df.first(), expected)

def test_toPandas_timestmap_tzinfo(self):
for arrow_enabled in [True, False]:
with self.subTest(arrow_enabled=arrow_enabled):
self.check_toPandas_timestmap_tzinfo(arrow_enabled)

def check_toPandas_timestmap_tzinfo(self, arrow_enabled):
# SPARK-47202: Test timestamp with tzinfo in toPandas and createDataFrame
ts_tzinfo = datetime.datetime(2023, 1, 1, 0, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles"))
data = pd.DataFrame({"a": [ts_tzinfo]})
df = self.spark.createDataFrame(data)

with self.sql_conf(
{
"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled,
}
):
pdf = df.toPandas()

expected = pd.DataFrame(
# Spark unsets tzinfo and converts them to localtimes.
{"a": [datetime.datetime.fromtimestamp(calendar.timegm(ts_tzinfo.utctimetuple()))]}
)

assert_frame_equal(pdf, expected)

def test_toPandas_nested_timestamp(self):
for arrow_enabled in [True, False]:
with self.subTest(arrow_enabled=arrow_enabled):
Expand Down

0 comments on commit e6f3dd9

Please sign in to comment.