Skip to content

Commit

Permalink
API: Disallow sets as index and columns argument in DataFrame constru…
Browse files Browse the repository at this point in the history
  • Loading branch information
Dr-Irv authored and noatamir committed Nov 9, 2022
1 parent d4efd3d commit 66318df
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 20 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ Other API changes
- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`)
- Operations with :class:`Timestamp` or :class:`Timedelta` that would previously raise ``OverflowError`` instead raise ``OutOfBoundsDatetime`` or ``OutOfBoundsTimedelta`` where appropriate (:issue:`47268`)
- When :func:`read_sas` previously returned ``None``, it now returns an empty :class:`DataFrame` (:issue:`47410`)
-
- :class:`DataFrame` constructor raises if ``index`` or ``columns`` arguments are sets (:issue:`47215`)

.. ---------------------------------------------------------------------------
.. _whatsnew_150.deprecations:
Expand Down
3 changes: 1 addition & 2 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
TYPE_CHECKING,
Any,
Callable,
Collection,
Dict,
Hashable,
Iterator,
Expand Down Expand Up @@ -115,7 +114,7 @@
Ordered = Optional[bool]
JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
Frequency = Union[str, "DateOffset"]
Axes = Collection[Any]
Axes = Union[AnyArrayLike, List, range]

RandomState = Union[
int,
Expand Down
21 changes: 9 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,12 @@ def __init__(

manager = get_option("mode.data_manager")

# GH47215
if index is not None and isinstance(index, set):
raise ValueError("index cannot be a set")
if columns is not None and isinstance(columns, set):
raise ValueError("columns cannot be a set")

if copy is None:
if isinstance(data, dict):
# retain pre-GH#38939 default behavior
Expand Down Expand Up @@ -729,10 +735,7 @@ def __init__(
if not isinstance(data, np.ndarray) and treat_as_nested(data):
# exclude ndarray as we may have cast it a few lines above
if columns is not None:
# error: Argument 1 to "ensure_index" has incompatible type
# "Collection[Any]"; expected "Union[Union[Union[ExtensionArray,
# ndarray], Index, Series], Sequence[Any]]"
columns = ensure_index(columns) # type: ignore[arg-type]
columns = ensure_index(columns)
arrays, columns, index = nested_data_to_arrays(
# error: Argument 3 to "nested_data_to_arrays" has incompatible
# type "Optional[Collection[Any]]"; expected "Optional[Index]"
Expand Down Expand Up @@ -770,14 +773,8 @@ def __init__(
if index is None or columns is None:
raise ValueError("DataFrame constructor not properly called!")

# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
# expected "Union[Union[Union[ExtensionArray, ndarray],
# Index, Series], Sequence[Any]]"
index = ensure_index(index) # type: ignore[arg-type]
# Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
# expected "Union[Union[Union[ExtensionArray, ndarray],
# Index, Series], Sequence[Any]]"
columns = ensure_index(columns) # type: ignore[arg-type]
index = ensure_index(index)
columns = ensure_index(columns)

if not dtype:
dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t):
obj.set_axis(labels, axis=axis, inplace=True)
return obj

def _set_axis(self, axis: int, labels: AnyArrayLike | Sequence) -> None:
def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
labels = ensure_index(labels)
self._mgr.set_axis(axis, labels)
self._clear_item_cache()
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
tz_compare,
)
from pandas._typing import (
AnyArrayLike,
ArrayLike,
Axes,
Dtype,
DtypeObj,
F,
Expand Down Expand Up @@ -7281,7 +7281,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
return MultiIndex.from_arrays(sequences, names=names)


def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Index:
def ensure_index(index_like: Axes, copy: bool = False) -> Index:
"""
Ensure that we have an index from some index-like object.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]:
def _can_hold_na(self) -> bool:
return self._mgr._can_hold_na

def _set_axis(self, axis: int, labels: AnyArrayLike | Sequence) -> None:
def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
"""
Override generic, we want to set the _typ here.
Expand Down
5 changes: 4 additions & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from pandas._libs.tslibs.nattype import NaTType
from pandas._typing import (
ArrayLike,
Axes,
ColspaceArgType,
ColspaceType,
CompressionOptions,
Expand Down Expand Up @@ -689,7 +690,9 @@ def _initialize_justify(self, justify: str | None) -> str:

def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:
if columns is not None:
cols = ensure_index(columns)
# GH 47231 - columns doesn't have to be `Sequence[str]`
# Will fix in later PR
cols = ensure_index(cast(Axes, columns))
self.frame = self.frame[cols]
return cols
else:
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3001,6 +3001,14 @@ def test_construction_from_ndarray_with_eadtype_mismatched_columns(self):
with pytest.raises(ValueError, match=msg):
DataFrame(arr2, columns=["foo", "bar"])

def test_columns_indexes_raise_on_sets(self):
# GH 47215
data = [[1, 2, 3], [4, 5, 6]]
with pytest.raises(ValueError, match="index cannot be a set"):
DataFrame(data, index={"a", "b"})
with pytest.raises(ValueError, match="columns cannot be a set"):
DataFrame(data, columns={"a", "b", "c"})


def get1(obj): # TODO: make a helper in tm?
if isinstance(obj, Series):
Expand Down

0 comments on commit 66318df

Please sign in to comment.