Skip to content

Commit

Permalink
[SPARK-40500][PS] Deprecate iteritems in DataFrame and Seriese
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

1. Use `pd.items` instead of `pd.iteritems`
2. Deprecate `ps.iteritems`

### Why are the changes needed?
`pd.iteritems` is deprecated in 1.5

before:
```
In [4]: import pyspark.pandas as ps

In [5]: ps.Series([3, 4, 1, 1, 5])
/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/internal.py:1573: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  fields = [
/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/pandas/conversion.py:486: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for column, series in pdf.iteritems():

0    3
1    4
2    1
3    1
4    5
dtype: int64
```

after:
```
In [1]: import pyspark.pandas as ps

In [2]: ps.Series([3, 4, 1, 1, 5])

0    3
1    4
2    1
3    1
4    5
dtype: int64
```

### Does this PR introduce _any_ user-facing change?
Eliminate `iteritems` warnings

### How was this patch tested?
existing UT

Closes #37947 from zhengruifeng/ps_iteritems_to_items.

Lead-authored-by: Ruifeng Zheng <ruifengz@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
zhengruifeng and HyukjinKwon committed Sep 21, 2022
1 parent f89b847 commit 44573a2
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 14 deletions.
15 changes: 11 additions & 4 deletions python/pyspark/pandas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1850,7 +1850,7 @@ def corrwith(
sser.name = None
return sser

def iteritems(self) -> Iterator[Tuple[Name, "Series"]]:
def items(self) -> Iterator[Tuple[Name, "Series"]]:
"""
Iterator over (column name, Series) pairs.
Expand Down Expand Up @@ -2054,9 +2054,16 @@ def extract_kv_from_spark_row(row: Row) -> Tuple[Name, Any]:
):
yield tuple(([k] if index else []) + list(v))

def items(self) -> Iterator[Tuple[Name, "Series"]]:
"""This is an alias of ``iteritems``."""
return self.iteritems()
def iteritems(self) -> Iterator[Tuple[Name, "Series"]]:
"""
This is an alias of ``items``.
.. deprecated:: 3.4.0
iteritems is deprecated and will be removed in a future version.
Use .items instead.
"""
warnings.warn("Deprecated in 3.4.0, Use DataFrame.items instead.", FutureWarning)
return self.items()

def to_clipboard(self, excel: bool = True, sep: Optional[str] = None, **kwargs: Any) -> None:
"""
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ def prepare_pandas_frame(
nullable=bool(col.isnull().any()),
),
)
for (name, col), dtype in zip(reset_index.iteritems(), index_dtypes + data_dtypes)
for (name, col), dtype in zip(reset_index.items(), index_dtypes + data_dtypes)
]

return (
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,7 @@ def output_func(pdf: pd.DataFrame) -> pd.DataFrame:
)

reset_index = pdf.reset_index()
for name, col in reset_index.iteritems():
for name, col in reset_index.items():
dt = col.dtype
if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt):
continue
Expand Down
15 changes: 11 additions & 4 deletions python/pyspark/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5951,7 +5951,7 @@ def item(self) -> Scalar:
"""
return self.head(2)._to_internal_pandas().item()

def iteritems(self) -> Iterable[Tuple[Name, Any]]:
def items(self) -> Iterable[Tuple[Name, Any]]:
"""
Lazily iterate over (index, value) tuples.
Expand Down Expand Up @@ -5998,9 +5998,16 @@ def extract_kv_from_spark_row(row: Row) -> Tuple[Name, Any]:
):
yield k, v

def items(self) -> Iterable[Tuple[Name, Any]]:
"""This is an alias of ``iteritems``."""
return self.iteritems()
def iteritems(self) -> Iterable[Tuple[Name, Any]]:
"""
This is an alias of ``items``.
.. deprecated:: 3.4.0
iteritems is deprecated and will be removed in a future version.
Use .items instead.
"""
warnings.warn("Deprecated in 3.4, Use Series.items instead.", FutureWarning)
return self.items()

def droplevel(self, level: Union[int, Name, List[Union[int, Name]]]) -> "Series":
"""
Expand Down
6 changes: 3 additions & 3 deletions python/pyspark/sql/pandas/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def _convert_from_pandas(
pdf[field.name] = s
else:
should_localize = not is_timestamp_ntz_preferred()
for column, series in pdf.iteritems():
for column, series in pdf.items():
s = series
if should_localize and is_datetime64tz_dtype(s.dtype) and s.dt.tz is not None:
s = _check_series_convert_timestamps_tz_local(series, timezone)
Expand All @@ -483,7 +483,7 @@ def _convert_from_pandas(
copied = True
pdf[column] = s

for column, series in pdf.iteritems():
for column, series in pdf.items():
if is_timedelta64_dtype(series):
if not copied:
pdf = pdf.copy()
Expand Down Expand Up @@ -601,7 +601,7 @@ def _create_from_pandas_with_arrow(

# Create list of Arrow (columns, type) for serializer dump_stream
arrow_data = [
[(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)]
[(c, t) for (_, c), t in zip(pdf_slice.items(), arrow_types)]
for pdf_slice in pdf_slices
]

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/tests/test_pandas_grouped_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ def f(key, pdf):
window_range = key[1]

# Make sure the key with group and window values are correct
for _, i in pdf.id.iteritems():
for _, i in pdf.id.items():
assert expected_key[i][0] == group, "{} != {}".format(expected_key[i][0], group)
assert expected_key[i][1] == window_range, "{} != {}".format(
expected_key[i][1], window_range
Expand Down

0 comments on commit 44573a2

Please sign in to comment.