Skip to content

Commit

Permalink
clib.converison._to_numpy: Add tests for pandas.Series with datetime …
Browse files Browse the repository at this point in the history
…dtypes (#3670)
  • Loading branch information
seisman authored Jan 9, 2025
1 parent 0bf733f commit 9e912ba
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 0 deletions.
11 changes: 11 additions & 0 deletions pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,17 @@ def _to_numpy(data: Any) -> np.ndarray:
numpy_dtype = np.float64
data = data.to_numpy(na_value=np.nan)

# Deal with timezone-aware datetime dtypes.
if isinstance(dtype, pd.DatetimeTZDtype): # pandas.DatetimeTZDtype
numpy_dtype = getattr(dtype, "base", None)
elif isinstance(dtype, pd.ArrowDtype) and hasattr(dtype.pyarrow_dtype, "tz"):
# pd.ArrowDtype[pa.Timestamp]
numpy_dtype = getattr(dtype, "numpy_dtype", None)
# TODO(pandas>=2.1): Remove the workaround for pandas<2.1.
if Version(pd.__version__) < Version("2.1"):
# In pandas 2.0, dtype.numpy_type is dtype("O").
numpy_dtype = np.dtype(f"M8[{dtype.pyarrow_dtype.unit}]") # type: ignore[assignment, attr-defined]

array = np.ascontiguousarray(data, dtype=numpy_dtype)

# Check if a np.object_ array can be converted to np.str_.
Expand Down
107 changes: 107 additions & 0 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,113 @@ def test_to_numpy_pandas_date(dtype, expected_dtype):
)


pandas_old_version = pytest.mark.xfail(
condition=Version(pd.__version__) < Version("2.1"),
reason="pandas 2.0 bug reported in https://github.com/pandas-dev/pandas/issues/52705",
)


@pytest.mark.parametrize(
("dtype", "expected_dtype"),
[
# NumPy datetime64 types. Only unit 's'/'ms'/'us'/'ns' are supported.
pytest.param("datetime64[s]", "datetime64[s]", id="datetime64[s]"),
pytest.param("datetime64[ms]", "datetime64[ms]", id="datetime64[ms]"),
pytest.param("datetime64[us]", "datetime64[us]", id="datetime64[us]"),
pytest.param("datetime64[ns]", "datetime64[ns]", id="datetime64[ns]"),
# pandas.DatetimeTZDtype can be given in two ways [tz is required]:
# 1. pandas.DatetimeTZDtype(unit, tz)
# 2. String aliases: "datetime64[unit, tz]"
pytest.param(
"datetime64[s, UTC]",
"datetime64[s]",
id="datetime64[s, tz=UTC]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, America/New_York]",
"datetime64[s]",
id="datetime64[s, tz=America/New_York]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, +07:30]",
"datetime64[s]",
id="datetime64[s, +07:30]",
marks=pandas_old_version,
),
# PyArrow timestamp types can be given in two ways [tz is optional]:
# 1. pd.ArrowDtype(pyarrow.Timestamp(unit, tz=tz))
# 2. String aliases: "timestamp[unit, tz][pyarrow]"
pytest.param(
"timestamp[s][pyarrow]",
"datetime64[s]",
id="timestamp[s][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[ms][pyarrow]",
"datetime64[ms]",
id="timestamp[ms][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[us][pyarrow]",
"datetime64[us]",
id="timestamp[us][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[ns][pyarrow]",
"datetime64[ns]",
id="timestamp[ns][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, UTC][pyarrow]",
"datetime64[s]",
id="timestamp[s, UTC][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, America/New_York][pyarrow]",
"datetime64[s]",
id="timestamp[s, America/New_York][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, +08:00][pyarrow]",
"datetime64[s]",
id="timestamp[s, +08:00][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
],
)
def test_to_numpy_pandas_datetime(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of datetime types.
"""
series = pd.Series(
[pd.Timestamp("2024-01-02T03:04:05"), pd.Timestamp("2024-01-02T03:04:06")],
dtype=dtype,
)
result = _to_numpy(series)
_check_result(result, np.datetime64)
assert result.dtype == expected_dtype

# Convert to UTC if the dtype is timezone-aware
if "," in str(dtype): # A hacky way to decide if the dtype is timezone-aware.
# TODO(pandas>=2.1): Simplify the if-else statement.
if Version(pd.__version__) < Version("2.1") and dtype.startswith("timestamp"):
# pandas 2.0 doesn't have the dt.tz_convert method for pyarrow.Timestamp.
series = pd.to_datetime(series, utc=True)
else:
series = series.dt.tz_convert("UTC")
# Remove time zone information and preserve local time.
expected_series = series.dt.tz_localize(tz=None)
npt.assert_array_equal(result, np.array(expected_series, dtype=expected_dtype))


########################################################################################
# Test the _to_numpy function with PyArrow arrays.
#
Expand Down

0 comments on commit 9e912ba

Please sign in to comment.