Skip to content

Commit

Permalink
String dtype: disallow specifying the 'str' dtype with storage in [..…
Browse files Browse the repository at this point in the history
…] in string alias (#60661)
  • Loading branch information
jorisvandenbossche authored Jan 13, 2025
1 parent 57d2489 commit 7415aca
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2339,7 +2339,7 @@ def construct_from_string(cls, string: str) -> ArrowDtype:
)
if not string.endswith("[pyarrow]"):
raise TypeError(f"'{string}' must end with '[pyarrow]'")
if string == "string[pyarrow]":
if string in ("string[pyarrow]", "str[pyarrow]"):
# Ensure Registry.find skips ArrowDtype to use StringDtype instead
raise TypeError("string[pyarrow] should be constructed by StringDtype")
if pa_version_under10p1:
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,26 @@ def test_pandas_dtype_string_dtypes(string_storage):
assert result == pd.StringDtype(string_storage, na_value=pd.NA)


def test_pandas_dtype_string_dtype_alias_with_storage():
with pytest.raises(TypeError, match="not understood"):
pandas_dtype("str[python]")

with pytest.raises(TypeError, match="not understood"):
pandas_dtype("str[pyarrow]")

result = pandas_dtype("string[python]")
assert result == pd.StringDtype("python", na_value=pd.NA)

if HAS_PYARROW:
result = pandas_dtype("string[pyarrow]")
assert result == pd.StringDtype("pyarrow", na_value=pd.NA)
else:
with pytest.raises(
ImportError, match="required for PyArrow backed StringArray"
):
pandas_dtype("string[pyarrow]")


@td.skip_if_installed("pyarrow")
def test_construct_from_string_without_pyarrow_installed():
# GH 57928
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/strings/test_get_dummies.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas.util._test_decorators as td

from pandas import (
ArrowDtype,
DataFrame,
Index,
MultiIndex,
Expand Down Expand Up @@ -113,15 +114,17 @@ def test_get_dummies_with_str_dtype(any_string_dtype):
# GH#47872
@td.skip_if_no("pyarrow")
def test_get_dummies_with_pa_str_dtype(any_string_dtype):
import pyarrow as pa

s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
result = s.str.get_dummies("|", dtype="str[pyarrow]")
result = s.str.get_dummies("|", dtype=ArrowDtype(pa.string()))
expected = DataFrame(
[
["true", "true", "false"],
["true", "false", "true"],
["false", "false", "false"],
],
columns=list("abc"),
dtype="str[pyarrow]",
dtype=ArrowDtype(pa.string()),
)
tm.assert_frame_equal(result, expected)

0 comments on commit 7415aca

Please sign in to comment.